From 9ccef7fa04d615df374390140fcfeaf700f36494 Mon Sep 17 00:00:00 2001
From: TianYuan <white-sky@qq.com>
Date: Fri, 29 Oct 2021 12:17:10 +0000
Subject: [PATCH] add paddle tts vs espnet tts demos

---
 docs/source/_static/custom.css             |   2 +-
 docs/source/tts/demo.rst                   | 269 +++++++++++++++++++-
 docs/source/tts/demo_2.rst                 | 280 +++++++++++++++++++++
 docs/source/tts/test_sentence.txt          |  14 ++
 parakeet/models/fastspeech2/fastspeech2.py |  25 +-
 5 files changed, 577 insertions(+), 13 deletions(-)
 create mode 100644 docs/source/tts/test_sentence.txt
diff --git a/docs/source/_static/custom.css b/docs/source/_static/custom.css
index 2987ae04..bb65c51a 100644
--- a/docs/source/_static/custom.css
+++ b/docs/source/_static/custom.css
@@ -2,4 +2,4 @@
     max-width: 80%;
 }
 .table table{ background:#b9b9b9} 
-.table table td{ background:#FFF} 
+.table table td{ background:#FFF; } 
diff --git a/docs/source/tts/demo.rst b/docs/source/tts/demo.rst
index 20208d2c..09c4d25a 100644
--- a/docs/source/tts/demo.rst
+++ b/docs/source/tts/demo.rst
@@ -248,7 +248,8 @@ Audio samples generated from ground-truth spectrograms with a vocoder.
         </tr>    
     </table>
     </div>
-    
+    <br>
+    <br>
 
 TTS
 -------------------
@@ -633,10 +634,264 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
             </td>
         </tr>   
     </table>
-
     </div>
+    <br>
+    <br>
+
+
+Multi-Speaker TTS
+-------------------
+
+PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generated by FastSpeech2 + ParallelWaveGAN, we use AISHELL-3 Multi-Speaker TTS dataset.
+
 
 
+.. raw:: html
+
+    <div class="table">
+    <table border="2" cellspacing="1" cellpadding="1">
+        <tr>
+            <th align="center"> Text </th>
+            <th align="center"> Origin </th>
+            <th align="center"> Generated </th>
+        </tr>
+    <table>
+    <div>
+    <br>
+    <br>
+        
+
+Duration control in FastSpeech2
+--------------------------------------
+In our FastSpeech2, we can control ``duration``, ``pitch`` and ``energy``, we provide the audio demos of duration control here. ``duration`` means the duration of phonemes, when we reduce duration, the speed of audios will increase, and when we incerase ``duration``, the speed of audios will reduce.
+
+The ``duration`` of different phonemes in a sentence can have different scale ratios (when you want to slow down one word and keep the other words' speed in a sentence). Here we use a fixed scale ratio for different phonemes to control the ``speed`` of audios.
+
+The duration control in FastSpeech2 can control the speed of audios will keep the pitch. (in some speech tool, increase the speed will increase the pitch, and vice versa.)
+
+.. raw:: html
+
+    <div class="table">
+    <table border="2" cellspacing="1" cellpadding="1">
+        <tr>
+            <th align="center"> Speed(0.8x) </th>
+            <th align="center"> Speed(1x) </th>
+            <th align="center"> Speed(1.2x) </th>
+        </tr>
+        <tr>
+             <td>
+                <audio controls="controls">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x0.8_001.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+            <td>
+                <audio controls="controls">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x1_001.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+            <td>
+                <audio controls="controls">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x1.2_001.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+        </tr>
+        <tr>
+             <td>
+                <audio controls="controls">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x0.8_002.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+            <td>
+                <audio controls="controls">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x1_002.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+            <td>
+                <audio controls="controls">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x1.2_002.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+        </tr>
+        <tr>
+             <td>
+                <audio controls="controls">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x0.8_003.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+            <td>
+                <audio controls="controls">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x1_003.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+            <td>
+                <audio controls="controls">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x1.2_003.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+        </tr>
+        <tr>
+             <td>
+                <audio controls="controls">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x0.8_004.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+            <td>
+                <audio controls="controls">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x1_004.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+            <td>
+                <audio controls="controls">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x1.2_004.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+        </tr>
+        <tr>
+             <td>
+                <audio controls="controls">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x0.8_005.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+            <td>
+                <audio controls="controls">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x1_005.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+            <td>
+                <audio controls="controls">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x1.2_005.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+        </tr>
+        <tr>
+             <td>
+                <audio controls="controls">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x0.8_007.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+            <td>
+                <audio controls="controls">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x1_007.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+            <td>
+                <audio controls="controls">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x1.2_007.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+        </tr>
+        <tr>
+             <td>
+                <audio controls="controls">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x0.8_008.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+            <td>
+                <audio controls="controls">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x1_008.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+            <td>
+                <audio controls="controls">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x1.2_008.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+        </tr>
+        <tr>
+             <td>
+                <audio controls="controls">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x0.8_009.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+            <td>
+                <audio controls="controls">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x1_009.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+            <td>
+                <audio controls="controls">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x1.2_009.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+        </tr>
+
+    <table>
+    <div>
+    <br>
+    <br>
+
 
 Chinese TTS with/without text frontend
 --------------------------------------
@@ -650,9 +905,9 @@ We use ``FastSpeech2`` + ``ParallelWaveGAN`` here.
     <div class="table">
     <table border="2" cellspacing="1" cellpadding="1">
         <tr>
-            <th align="left"> Text</th>
-            <th align="left"> With Text Frontend </th>
-            <th align="left"> Without Text Frontend </th>
+            <th align="center"> Text</th>
+            <th align="center"> With Text Frontend </th>
+            <th align="center"> Without Text Frontend </th>
         </tr>
         <tr>
             <td>他只是一个纸老虎。</td>
@@ -846,6 +1101,8 @@ We use ``FastSpeech2`` + ``ParallelWaveGAN`` here.
         </tr>
 
     <table>
-    </div>  
+    </div>
+    <br>
+    <br> 
 
    
\ No newline at end of file
diff --git a/docs/source/tts/demo_2.rst b/docs/source/tts/demo_2.rst
index 37922fcb..2f0ca7cd 100644
--- a/docs/source/tts/demo_2.rst
+++ b/docs/source/tts/demo_2.rst
@@ -5,3 +5,283 @@ This is an audio demo page to contrast PaddleSpeech TTS and Espnet TTS, We use t
 We use Espnet's released models here.
 
 FastSpeech2 + Parallel WaveGAN in CSMSC
+
+.. raw:: html
+     
+    
+    <div class="table">
+    <table border="2" cellspacing="1" cellpadding="1"> 
+        <tr>
+            <th align="center"> Text </th>
+            <th align="center"> Espent TTS </th>
+            <th align="center"> PaddleSpeech TTS </th>
+        </tr>
+        <tr>
+            <td>早上好，今天是2020/10/29，最低温度是-3°C。</td>
+            <td>
+                <audio controls="controls">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/001.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+            <td>
+                <audio controls="controls">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/001.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+        </tr>
+        <tr>
+            <td>你好，我的编号是37249，很高兴为您服务。</td>
+            <td>
+                <audio controls="controls">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/002.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+            <td>
+                <audio controls="controls">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/002.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+        </tr>
+        <tr>
+            <td>我们公司有37249个人。</td>
+            <td>
+                <audio controls="controls">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/003.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+            <td>
+                <audio controls="controls">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/003.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+        </tr>
+        <tr>
+            <td>我出生于2005年10月8日。</td>
+            <td>
+                <audio controls="controls">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/004.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+            <td>
+                <audio controls="controls">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/004.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+        </tr>
+        <tr>
+            <td>我们习惯在12:30吃中午饭。</td>
+            <td>
+                <audio controls="controls">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/005.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+            <td>
+                <audio controls="controls">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/005.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+        </tr>
+        <tr>
+            <td>只要有超过3/4的人投票同意，你就会成为我们的新班长。</td>
+            <td>
+                <audio controls="controls">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/006.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+            <td>
+                <audio controls="controls">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/006.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+        </tr>
+        <tr>
+            <td>我要买一只价值999.9元的手表。</td>
+            <td>
+                <audio controls="controls">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/007.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+            <td>
+                <audio controls="controls">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/007.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+        </tr>
+        <tr>
+            <td>我的手机号是18544139121，欢迎来电。</td>
+            <td>
+                <audio controls="controls">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/008.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+            <td>
+                <audio controls="controls">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/008.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+        </tr>
+        <tr>
+            <td>明天有62%的概率降雨。</td>
+            <td>
+                <audio controls="controls">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/009.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+            <td>
+                <audio controls="controls">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/009.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+        </tr>
+        <tr>
+            <td>手表厂有五种好产品。</td>
+            <td>
+                <audio controls="controls">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/010.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+            <td>
+                <audio controls="controls">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/010.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+        </tr>
+        <tr>
+            <td>跑马场有五百匹很勇敢的千里马。</td>
+            <td>
+                <audio controls="controls">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/011.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+            <td>
+                <audio controls="controls">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/011.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+        </tr>
+        <tr>
+            <td>有一天，我看到了一栋楼，我顿感不妙，因为我看不清里面有没有人。</td>
+            <td>
+                <audio controls="controls">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/012.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+            <td>
+                <audio controls="controls">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/012.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+        </tr>
+        <tr>
+            <td>史小姐拿着小雨伞去找她的老保姆了。</td>
+            <td>
+                <audio controls="controls">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/013.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+            <td>
+                <audio controls="controls">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/013.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+        </tr>
+        <tr>
+            <td>不要相信这个老奶奶说的话，她一点儿也不好。</td>
+            <td>
+                <audio controls="controls">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/014.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+            <td>
+                <audio controls="controls">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/014.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+        </tr>
+        </table>
+        </div>
+
diff --git a/docs/source/tts/test_sentence.txt b/docs/source/tts/test_sentence.txt
new file mode 100644
index 00000000..933f4749
--- /dev/null
+++ b/docs/source/tts/test_sentence.txt
@@ -0,0 +1,14 @@
+001 早上好，今天是2020/10/29，最低温度是-3°C。
+002 你好，我的编号是37249，很高兴为您服务。
+003 我们公司有37249个人。
+004 我出生于2005年10月8日。
+005 我们习惯在12:30吃中午饭。
+006 只要有超过3/4的人投票同意，你就会成为我们的新班长。
+007 我要买一只价值999.9元的手表。
+008 我的手机号是18544139121，欢迎来电。
+009 明天有62%的概率降雨。
+010 手表厂有五种好产品。
+011 跑马场有五百匹很勇敢的千里马。
+012 有一天，我看到了一栋楼，我顿感不妙，因为我看不清里面有没有人。
+013 史小姐拿着小雨伞去找她的老保姆了。
+014 不要相信这个老奶奶说的话，她一点儿也不好。
\ No newline at end of file
diff --git a/parakeet/models/fastspeech2/fastspeech2.py b/parakeet/models/fastspeech2/fastspeech2.py
index 7c0e20bc..bde3a82b 100644
--- a/parakeet/models/fastspeech2/fastspeech2.py
+++ b/parakeet/models/fastspeech2/fastspeech2.py
@@ -420,9 +420,18 @@ class FastSpeech2(nn.Layer):
 
         if is_inference:
             # (B, Tmax)
-            d_outs = self.duration_predictor.inference(hs, d_masks)
+            if ds is not None:
+                d_outs = ds
+            else:
+                d_outs = self.duration_predictor.inference(hs, d_masks)
+            if ps is not None:
+                p_outs = ps
+            if es is not None:
+                e_outs = es
+
             # use prediction in inference
             # (B, Tmax, 1)
+
             p_embs = self.pitch_embed(p_outs.transpose((0, 2, 1))).transpose(
                 (0, 2, 1))
             e_embs = self.energy_embed(e_outs.transpose((0, 2, 1))).transpose(
@@ -513,7 +522,7 @@ class FastSpeech2(nn.Layer):
         x = paddle.cast(text, 'int64')
         y = speech
         spemb = spembs
-        if durations:
+        if durations is not None:
             d = paddle.cast(durations, 'int64')
         p, e = pitch, energy
         # setup batch axis
@@ -531,9 +540,12 @@ class FastSpeech2(nn.Layer):
 
         if use_teacher_forcing:
             # use groundtruth of duration, pitch, and energy
-            ds, ps, es = d.unsqueeze(0), p.unsqueeze(0), e.unsqueeze(0)
+            ds = d.unsqueeze(0) if d is not None else None
+            ps = p.unsqueeze(0) if p is not None else None
+            es = e.unsqueeze(0) if e is not None else None
+            # ds, ps, es = , p.unsqueeze(0), e.unsqueeze(0)
             # (1, L, odim)
-            _, outs, *_ = self._forward(
+            _, outs, d_outs, *_ = self._forward(
                 xs,
                 ilens,
                 ys,
@@ -542,10 +554,11 @@ class FastSpeech2(nn.Layer):
                 es=es,
                 spembs=spembs,
                 spk_id=spk_id,
-                tone_id=tone_id)
+                tone_id=tone_id,
+                is_inference=True)
         else:
             # (1, L, odim)
-            _, outs, *_ = self._forward(
+            _, outs, d_outs, *_ = self._forward(
                 xs,
                 ilens,
                 ys,

Speed(0.8x)	Speed(1x)	Speed(1.2x)
+ +	+ +	+ +
+ +	+ +	+ +
+ +	+ +	+ +
+ +	+ +	+ +
+ +	+ +	+ +
+ +	+ +	+ +
+ +	+ +	+ +
+ +	+ +	+ +
Text	Espent TTS	PaddleSpeech TTS
早上好，今天是2020/10/29，最低温度是-3°C。	+ +	+ +
你好，我的编号是37249，很高兴为您服务。	+ +	+ +
我们公司有37249个人。	+ +	+ +
我出生于2005年10月8日。	+ +	+ +
我们习惯在12:30吃中午饭。	+ +	+ +
只要有超过3/4的人投票同意，你就会成为我们的新班长。	+ +	+ +
我要买一只价值999.9元的手表。	+ +	+ +
我的手机号是18544139121，欢迎来电。	+ +	+ +
明天有62%的概率降雨。	+ +	+ +
手表厂有五种好产品。	+ +	+ +
跑马场有五百匹很勇敢的千里马。	+ +	+ +
有一天，我看到了一栋楼，我顿感不妙，因为我看不清里面有没有人。	+ +	+ +
史小姐拿着小雨伞去找她的老保姆了。	+ +	+ +
不要相信这个老奶奶说的话，她一点儿也不好。	+ +	+ +