diff --git a/demos/text_to_speech/README.md b/demos/text_to_speech/README.md
index 41dcf820b..0db18f46b 100644
--- a/demos/text_to_speech/README.md
+++ b/demos/text_to_speech/README.md
@@ -58,7 +58,14 @@ The input of this demo should be a text of the specific language that can be pas
         paddlespeech tts --am fastspeech2_mix --voc pwgan_csmsc --lang mix --input "我们的声学模型使用了 Fast Speech Two, 声码器使用了 Parallel Wave GAN and Hifi GAN." --spk_id 175 --output mix_spk175_pwgan.wav
         paddlespeech tts --am fastspeech2_mix --voc hifigan_csmsc --lang mix --input "我们的声学模型使用了 Fast Speech Two, 声码器使用了 Parallel Wave GAN and Hifi GAN." --spk_id 175 --output mix_spk175.wav
         ```
-     - Use ONNXRuntime infer：
+    - Chinese English Mixed, single male spk
+        ```bash
+        # male mix tts
+        # The `lang` must be `mix`!
+        paddlespeech tts --am fastspeech2_male --voc pwgan_male --lang mix --input "我们的声学模型使用了 Fast Speech Two, 声码器使用了 Parallel Wave GAN and Hifi GAN." --output male_mix_fs2_pwgan.wav
+        paddlespeech tts --am fastspeech2_male --voc hifigan_male --lang mix --input "我们的声学模型使用了 Fast Speech Two, 声码器使用了 Parallel Wave GAN and Hifi GAN." --output male_mix_fs2_hifigan.wav
+        ```
+    - Use ONNXRuntime infer：
         ```bash
         paddlespeech tts --input "你好，欢迎使用百度飞桨深度学习框架！" --output default.wav --use_onnx True
         paddlespeech tts --am speedyspeech_csmsc --input "你好，欢迎使用百度飞桨深度学习框架！" --output ss.wav --use_onnx True
@@ -70,7 +77,14 @@ The input of this demo should be a text of the specific language that can be pas
         paddlespeech tts --am fastspeech2_ljspeech --voc hifigan_ljspeech --lang en --input "Life was like a box of chocolates, you never know what you're gonna get." --output lj_fs2_hifigan.wav --use_onnx True
         paddlespeech tts --am fastspeech2_vctk --voc pwgan_vctk --input "Life was like a box of chocolates, you never know what you're gonna get." --lang en --spk_id 0 --output vctk_fs2_pwgan.wav --use_onnx True
         paddlespeech tts --am fastspeech2_vctk --voc hifigan_vctk --input "Life was like a box of chocolates, you never know what you're gonna get." --lang en --spk_id 0 --output vctk_fs2_hifigan.wav --use_onnx True
-         ```
+        paddlespeech tts --am fastspeech2_male --voc pwgan_male --lang zh --input "你好，欢迎使用百度飞桨深度学习框架！" --output male_zh_fs2_pwgan.wav --use_onnx True
+        paddlespeech tts --am fastspeech2_male --voc pwgan_male --lang en --input "Life was like a box of chocolates, you never know what you're gonna get." --output male_en_fs2_pwgan.wav --use_onnx True
+        paddlespeech tts --am fastspeech2_male --voc pwgan_male --lang mix --input "热烈欢迎您在 Discussions 中提交问题，并在 Issues 中指出发现的 bug。此外，我们非常希望您参与到 Paddle Speech 的开发中！" --output male_fs2_pwgan.wav --use_onnx True
+        paddlespeech tts --am fastspeech2_male --voc hifigan_male --lang zh --input "你好，欢迎使用百度飞桨深度学习框架！" --output male_zh_fs2_hifigan.wav --use_onnx True
+        paddlespeech tts --am fastspeech2_male --voc hifigan_male --lang en --input "Life was like a box of chocolates, you never know what you're gonna get." --output male_en_fs2_hifigan.wav --use_onnx True
+        paddlespeech tts --am fastspeech2_mix --voc hifigan_male --lang mix --input "热烈欢迎您在 Discussions 中提交问题，并在 Issues 中指出发现的 bug。此外，我们非常希望您参与到 Paddle Speech 的开发中！" --output male_fs2_hifigan.wav --use_onnx True
+        paddlespeech tts --am fastspeech2_mix --voc pwgan_csmsc --lang mix --spk_id 174 --input "热烈欢迎您在 Discussions 中提交问题，并在 Issues 中指出发现的 bug。此外，我们非常希望您参与到 Paddle Speech 的开发中！" --output mix_fs2_pwgan_csmsc_spk174.wav --use_onnx True
+        ```
 
   Usage:
   
@@ -161,6 +175,9 @@ Here is a list of pretrained models released by PaddleSpeech that can be used by
   |       fastspeech2_mix        |   mix    |
   |       tacotron2_csmsc        |    zh    |
   |      tacotron2_ljspeech      |    en    |
+  |       fastspeech2_male       |    zh    |
+  |       fastspeech2_male       |    en    |
+  |       fastspeech2_male       |   mix    |
 
 - Vocoder
   | Model | Language |
@@ -176,3 +193,5 @@ Here is a list of pretrained models released by PaddleSpeech that can be used by
   |       hifigan_aishell3       |    zh    |
   |         hifigan_vctk         |    en    |
   |        wavernn_csmsc         |    zh    |
+  |         pwgan_male           |    zh    |
+  |        hifigan_male          |    zh    |
diff --git a/demos/text_to_speech/README_cn.md b/demos/text_to_speech/README_cn.md
index 4a4132238..250d56e24 100644
--- a/demos/text_to_speech/README_cn.md
+++ b/demos/text_to_speech/README_cn.md
@@ -58,7 +58,14 @@
         paddlespeech tts --am fastspeech2_mix --voc pwgan_csmsc --lang mix --input "我们的声学模型使用了 Fast Speech Two, 声码器使用了 Parallel Wave GAN and Hifi GAN." --spk_id 175 --output mix_spk175_pwgan.wav
         paddlespeech tts --am fastspeech2_mix --voc hifigan_csmsc --lang mix --input "我们的声学模型使用了 Fast Speech Two, 声码器使用了 Parallel Wave GAN and Hifi GAN." --spk_id 175 --output mix_spk175.wav
         ```
-     - 使用 ONNXRuntime 推理：
+    - 中英文混合，单个男性说话人
+        ```bash
+        # male mix tts
+        # The `lang` must be `mix`!
+        paddlespeech tts --am fastspeech2_male --voc pwgan_male --lang mix --input "我们的声学模型使用了 Fast Speech Two, 声码器使用了 Parallel Wave GAN and Hifi GAN." --output male_mix_fs2_pwgan.wav
+        paddlespeech tts --am fastspeech2_male --voc hifigan_male --lang mix --input "我们的声学模型使用了 Fast Speech Two, 声码器使用了 Parallel Wave GAN and Hifi GAN." --output male_mix_fs2_hifigan.wav
+        ```
+    - 使用 ONNXRuntime 推理：
         ```bash
         paddlespeech tts --input "你好，欢迎使用百度飞桨深度学习框架！" --output default.wav --use_onnx True
         paddlespeech tts --am speedyspeech_csmsc --input "你好，欢迎使用百度飞桨深度学习框架！" --output ss.wav --use_onnx True
@@ -70,7 +77,14 @@
         paddlespeech tts --am fastspeech2_ljspeech --voc hifigan_ljspeech --lang en --input "Life was like a box of chocolates, you never know what you're gonna get." --output lj_fs2_hifigan.wav --use_onnx True
         paddlespeech tts --am fastspeech2_vctk --voc pwgan_vctk --input "Life was like a box of chocolates, you never know what you're gonna get." --lang en --spk_id 0 --output vctk_fs2_pwgan.wav --use_onnx True
         paddlespeech tts --am fastspeech2_vctk --voc hifigan_vctk --input "Life was like a box of chocolates, you never know what you're gonna get." --lang en --spk_id 0 --output vctk_fs2_hifigan.wav --use_onnx True
-         ```
+        paddlespeech tts --am fastspeech2_male --voc pwgan_male --lang zh --input "你好，欢迎使用百度飞桨深度学习框架！" --output male_zh_fs2_pwgan.wav --use_onnx True
+        paddlespeech tts --am fastspeech2_male --voc pwgan_male --lang en --input "Life was like a box of chocolates, you never know what you're gonna get." --output male_en_fs2_pwgan.wav --use_onnx True
+        paddlespeech tts --am fastspeech2_male --voc pwgan_male --lang mix --input "热烈欢迎您在 Discussions 中提交问题，并在 Issues 中指出发现的 bug。此外，我们非常希望您参与到 Paddle Speech 的开发中！" --output male_fs2_pwgan.wav --use_onnx True
+        paddlespeech tts --am fastspeech2_male --voc hifigan_male --lang zh --input "你好，欢迎使用百度飞桨深度学习框架！" --output male_zh_fs2_hifigan.wav --use_onnx True
+        paddlespeech tts --am fastspeech2_male --voc hifigan_male --lang en --input "Life was like a box of chocolates, you never know what you're gonna get." --output male_en_fs2_hifigan.wav --use_onnx True
+        paddlespeech tts --am fastspeech2_mix --voc hifigan_male --lang mix --input "热烈欢迎您在 Discussions 中提交问题，并在 Issues 中指出发现的 bug。此外，我们非常希望您参与到 Paddle Speech 的开发中！" --output male_fs2_hifigan.wav --use_onnx True
+        paddlespeech tts --am fastspeech2_mix --voc pwgan_csmsc --lang mix --spk_id 174 --input "热烈欢迎您在 Discussions 中提交问题，并在 Issues 中指出发现的 bug。此外，我们非常希望您参与到 Paddle Speech 的开发中！" --output mix_fs2_pwgan_csmsc_spk174.wav --use_onnx True
+        ```
 
   使用方法：
   
@@ -161,6 +175,10 @@
   |       fastspeech2_mix        |   mix    |
   |       tacotron2_csmsc        |    zh    |
   |      tacotron2_ljspeech      |    en    |
+  |       fastspeech2_male       |    zh    |
+  |       fastspeech2_male       |    en    |
+  |       fastspeech2_male       |   mix    |
+  
 
 - 声码器
   | 模型 | 语言 |
@@ -176,3 +194,5 @@
   |       hifigan_aishell3       |    zh    |
   |         hifigan_vctk         |    en    |
   |        wavernn_csmsc         |    zh    |
+  |         pwgan_male           |    zh    |
+  |        hifigan_male          |    zh    |
diff --git a/paddlespeech/cli/tts/infer.py b/paddlespeech/cli/tts/infer.py
index e95c85744..6334211a0 100644
--- a/paddlespeech/cli/tts/infer.py
+++ b/paddlespeech/cli/tts/infer.py
@@ -522,11 +522,10 @@ class TTSExecutor(BaseExecutor):
             text=text,
             merge_sentences=merge_sentences,
             get_tone_ids=get_tone_ids,
-            lang=lang, )
+            lang=lang,
+            to_tensor=False, )
         self.frontend_time = time.time() - frontend_st
         phone_ids = frontend_dict['phone_ids']
-        # onnx need numpy data as input 
-        phone_ids = [phone_id.numpy() for phone_id in phone_ids]
         self.am_time = 0
         self.voc_time = 0
         flags = 0
diff --git a/paddlespeech/t2s/frontend/mix_frontend.py b/paddlespeech/t2s/frontend/mix_frontend.py
index c13a5ab62..b8c16097c 100644
--- a/paddlespeech/t2s/frontend/mix_frontend.py
+++ b/paddlespeech/t2s/frontend/mix_frontend.py
@@ -15,6 +15,7 @@ import re
 from typing import Dict
 from typing import List
 
+import numpy as np
 import paddle
 
 from paddlespeech.t2s.frontend import English
@@ -32,6 +33,7 @@ class MixFrontend():
             phone_vocab_path=phone_vocab_path, tone_vocab_path=tone_vocab_path)
         self.en_frontend = English(phone_vocab_path=phone_vocab_path)
         self.sp_id = self.zh_frontend.vocab_phones["sp"]
+        self.sp_id_numpy = np.array([self.sp_id])
         self.sp_id_tensor = paddle.to_tensor([self.sp_id])
 
     def is_chinese(self, char):
@@ -108,7 +110,6 @@ class MixFrontend():
                       get_tone_ids: bool=False,
                       add_sp: bool=True,
                       to_tensor: bool=True) -> Dict[str, List[paddle.Tensor]]:
-
         ''' 1. 添加SSML支持，先列出 文字 和 <say-as>标签内容，
                 然后添加到tmpSegments数组里
         '''
@@ -120,7 +121,6 @@ class MixFrontend():
                 tmpSegments.append((instr, "zh"))
             else:
                 tmpSegments.extend(self.get_segment(instr))
-
         ''' 2. 把zh的merge到一起，避免合成结果中间停顿
         '''
         segments = []
@@ -171,8 +171,12 @@ class MixFrontend():
                             get_tone_ids=get_tone_ids,
                             to_tensor=to_tensor)
                 if add_sp:
-                    input_ids["phone_ids"][-1] = paddle.concat(
-                        [input_ids["phone_ids"][-1], self.sp_id_tensor])
+                    if to_tensor:
+                        input_ids["phone_ids"][-1] = paddle.concat(
+                            [input_ids["phone_ids"][-1], self.sp_id_tensor])
+                    else:
+                        input_ids["phone_ids"][-1] = np.concatenate(
+                            (input_ids["phone_ids"][-1], self.sp_id_numpy))
 
                 for phones in input_ids["phone_ids"]:
                     phones_list.append(phones)
@@ -181,7 +185,8 @@ class MixFrontend():
             merge_list = paddle.concat(phones_list)
             # rm the last 'sp' to avoid the noise at the end
             # cause in the training data, no 'sp' in the end
-            if merge_list[-1] == self.sp_id_tensor:
+            if (to_tensor and merge_list[-1] == self.sp_id_tensor) or (
+                    not to_tensor and merge_list[-1] == self.sp_id_numpy):
                 merge_list = merge_list[:-1]
             phones_list = []
             phones_list.append(merge_list)
diff --git a/tests/unit/cli/test_cli.sh b/tests/unit/cli/test_cli.sh
index 5d3b76f6c..6b5252683 100755
--- a/tests/unit/cli/test_cli.sh
+++ b/tests/unit/cli/test_cli.sh
@@ -59,7 +59,9 @@ paddlespeech tts --am fastspeech2_vctk --voc hifigan_vctk --input "Life was like
 paddlespeech tts --am tacotron2_csmsc --input "你好，欢迎使用百度飞桨深度学习框架！"
 paddlespeech tts --am tacotron2_csmsc --voc wavernn_csmsc --input "你好，欢迎使用百度飞桨深度学习框架！"
 paddlespeech tts --am tacotron2_ljspeech --voc pwgan_ljspeech --lang en --input "Life was like a box of chocolates, you never know what you're gonna get."
-paddlespeech tts --am fastspeech2_male --voc pwgan_male --input "你好，欢迎使用百度飞桨深度学习框架！"
+paddlespeech tts --am fastspeech2_male --voc pwgan_male --lang zh --input "你好，欢迎使用百度飞桨深度学习框架！"
+paddlespeech tts --am fastspeech2_male --voc pwgan_male --lang en --input "Life was like a box of chocolates, you never know what you're gonna get."
+
 # mix tts
 # The `am` must be `fastspeech2_mix`!
 # The `lang` must be `mix`!
@@ -70,6 +72,8 @@ paddlespeech tts --am fastspeech2_mix --voc hifigan_aishell3 --lang mix --input
 paddlespeech tts --am fastspeech2_mix --voc pwgan_csmsc --lang mix --input "我们的声学模型使用了 Fast Speech Two, 声码器使用了 Parallel Wave GAN and Hifi GAN." --spk_id 175 --output mix_spk175_pwgan.wav
 paddlespeech tts --am fastspeech2_mix --voc hifigan_csmsc --lang mix --input "我们的声学模型使用了 Fast Speech Two, 声码器使用了 Parallel Wave GAN and Hifi GAN." --spk_id 175 --output mix_spk175.wav
 
+# male mix tts
+paddlespeech tts --am fastspeech2_male --voc pwgan_male --lang mix --input "我们的声学模型使用了 Fast Speech Two, 声码器使用了 Parallel Wave GAN and Hifi GAN." --output male_mix_fs2_pwgan.wav
 
 # Speech Translation (only support linux)
 paddlespeech st --input ./en.wav