From c759fef0aacefdcb89daa37f8ed39e471fd91e9a Mon Sep 17 00:00:00 2001
From: huangyuxin <hyxin2014@126.com>
Date: Mon, 13 Dec 2021 06:38:16 +0000
Subject: [PATCH 01/23] move pypi-kenlm from install requirements to develop
 requirements

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 1ac671f1c..a5b773edf 100644
--- a/setup.py
+++ b/setup.py
@@ -46,7 +46,6 @@ requirements = {
         "paddleaudio",
         "paddlespeech_feat",
         "praatio~=4.1",
-        "pypi-kenlm",
         "pypinyin",
         "python-dateutil",
         "pyworld",
@@ -71,6 +70,7 @@ requirements = {
         "phkit",
         "Pillow",
         "pybind11",
+        "pypi-kenlm",
         "snakeviz",
         "sox",
         "soxbindings",

From 9e31a606d10a3b34e8b236637f01b3257e786ed0 Mon Sep 17 00:00:00 2001
From: Jackwaterveg <87408988+Jackwaterveg@users.noreply.github.com>
Date: Mon, 13 Dec 2021 14:46:20 +0800
Subject: [PATCH 02/23] set default encoding utf8 for win (#1101)

Co-authored-by: KP <109694228@qq.com>
---
 paddlespeech/cli/__init__.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/paddlespeech/cli/__init__.py b/paddlespeech/cli/__init__.py
index 99a53c37e..c82168aee 100644
--- a/paddlespeech/cli/__init__.py
+++ b/paddlespeech/cli/__init__.py
@@ -11,9 +11,13 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
+import _locale
 from .asr import ASRExecutor
 from .base_commands import BaseCommand
 from .base_commands import HelpCommand
 from .cls import CLSExecutor
 from .st import STExecutor
 from .tts import TTSExecutor
+
+_locale._getdefaultlocale = (lambda *args: ['en_US', 'utf8'])

From 7acf62d208b0092b5fb4163fdd6497409b1cc063 Mon Sep 17 00:00:00 2001
From: Hui Zhang <zhtclz@foxmail.com>
Date: Mon, 13 Dec 2021 15:25:40 +0800
Subject: [PATCH 03/23] fix release model (#1106)

---
 docs/source/released_model.md | 48 ++++++++++++++++++-----------------
 1 file changed, 25 insertions(+), 23 deletions(-)

diff --git a/docs/source/released_model.md b/docs/source/released_model.md
index 58650e593..1b61ccc7b 100644
--- a/docs/source/released_model.md
+++ b/docs/source/released_model.md
@@ -2,32 +2,31 @@
 
 ## Speech-to-Text Models
 
-### Acoustic Model Released in paddle 2.X
-Acoustic Model | Training Data | Token-based | Size | Descriptions | CER | WER | Hours of speech | example link
+### Speech Recognition Model
+Acoustic Model | Training Data | Token-based | Size | Descriptions | CER | WER | Hours of speech | Example Link 
 :-------------:| :------------:| :-----: | -----: | :----------------- |:--------- | :---------- | :--------- | :-----------
-[Ds2 Online Aishell ASR0 Model](https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/aishell_ds2_online_cer8.00_release.tar.gz) | Aishell Dataset | Char-based | 345 MB  | 2 Conv + 5 LSTM layers with only forward direction | 0.080 |-| 151 h | [D2 Online Aishell S0 Example](../../examples/aishell/asr0)
-[Ds2 Offline Aishell ASR0 Model](https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/ds2.model.tar.gz)| Aishell Dataset | Char-based | 306 MB | 2 Conv + 3 bidirectional GRU layers| 0.064 |-| 151 h | [Ds2 Offline Aishell S0 Example](../../examples/aishell/asr0)
-[Conformer Online Aishell ASR1 Model](https://deepspeech.bj.bcebos.com/release2.1/aishell/s1/aishell.chunk.release.tar.gz) | Aishell Dataset | Char-based | 283 MB  | Encoder:Conformer, Decoder:Transformer, Decoding method: Attention rescoring | 0.0594 |-| 151 h | [Conformer Online Aishell S1 Example](../../examples/aishell/s1)
-[Conformer Offline Aishell ASR1 Model](https://deepspeech.bj.bcebos.com/release2.1/aishell/s1/aishell.release.tar.gz) | Aishell Dataset | Char-based | 284 MB  | Encoder:Conformer, Decoder:Transformer, Decoding method: Attention rescoring | 0.0547 |-| 151 h | [Conformer Offline Aishell S1 Example](../../examples/aishell/s1)
-[Conformer Librispeech ASR1 Model](https://deepspeech.bj.bcebos.com/release2.1/librispeech/s1/conformer.release.tar.gz) | Librispeech Dataset | subword-based | 287 MB  | Encoder:Conformer, Decoder:Transformer, Decoding method: Attention rescoring |-| 0.0325 | 960 h | [Conformer Librispeech S1 example](../../example/librispeech/s1)
-[Transformer Librispeech ASR1 Model](https://paddlespeech.bj.bcebos.com/s2t/librispeech/asr1/transformer.model.tar.gz) | Librispeech Dataset | subword-based | 131 MB  | Encoder:Transformer, Decoder:Transformer, Decoding method: Attention rescoring |-| 0.0410 | 960 h | [Transformer Librispeech S1 example](../../example/librispeech/s1)
-[Transformer Librispeech ASR2 Model](https://paddlespeech.bj.bcebos.com/s2t/librispeech/asr2/transformer.model.tar.gz) | Librispeech Dataset | subword-based | 131 MB  | Encoder:Transformer, Decoder:Transformer, Decoding method: JoinCTC w/ LM |-| 0.024 | 960 h | [Transformer Librispeech S2 example](../../example/librispeech/s2)
-
-
-### Acoustic Model Transformed from paddle 1.8
-Acoustic Model | Training Data | Token-based | Size | Descriptions | CER | WER | Hours of speech
-:-------------:| :------------:| :-----: | -----: | :----------------- | :---------- | :---------- | :---------
-[Ds2 Offline Aishell model](https://deepspeech.bj.bcebos.com/mandarin_models/aishell_model_v1.8_to_v2.x.tar.gz)|Aishell Dataset| Char-based| 234 MB| 2 Conv + 3 bidirectional GRU layers| 0.0804 |-| 151 h|
-[Ds2 Offline Librispeech model](https://deepspeech.bj.bcebos.com/eng_models/librispeech_v1.8_to_v2.x.tar.gz)|Librispeech Dataset| Word-based| 307 MB| 2 Conv + 3 bidirectional sharing weight RNN layers |-| 0.0685| 960 h|
-[Ds2 Offline Baidu en8k model](https://deepspeech.bj.bcebos.com/eng_models/baidu_en8k_v1.8_to_v2.x.tar.gz)|Baidu Internal English Dataset| Word-based| 273 MB| 2 Conv + 3 bidirectional GRU layers |-| 0.0541 | 8628 h|
-
-### Language Model Released
+[Ds2 Online Aishell ASR0 Model](https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/aishell_ds2_online_cer8.00_release.tar.gz) | Aishell Dataset | Char-based | 345 MB  | 2 Conv + 5 LSTM layers with only forward direction | 0.080 |-| 151 h | [D2 Online Aishell ASR0](../../examples/aishell/asr0) 
+[Ds2 Offline Aishell ASR0 Model](https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/ds2.model.tar.gz)| Aishell Dataset | Char-based | 306 MB | 2 Conv + 3 bidirectional GRU layers| 0.064 |-| 151 h | [Ds2 Offline Aishell ASR0](../../examples/aishell/asr0) 
+[Conformer Online Aishell ASR1 Model](https://deepspeech.bj.bcebos.com/release2.1/aishell/s1/aishell.chunk.release.tar.gz) | Aishell Dataset | Char-based | 283 MB  | Encoder:Conformer, Decoder:Transformer, Decoding method: Attention rescoring | 0.0594 |-| 151 h | [Conformer Online Aishell ASR1](../../examples/aishell/asr1) 
+[Conformer Offline Aishell ASR1 Model](https://deepspeech.bj.bcebos.com/release2.1/aishell/s1/aishell.release.tar.gz) | Aishell Dataset | Char-based | 284 MB  | Encoder:Conformer, Decoder:Transformer, Decoding method: Attention rescoring | 0.0547 |-| 151 h | [Conformer Offline Aishell ASR1](../../examples/aishell/asr1) 
+[Transformer Aishell ASR1 Model](https://paddlespeech.bj.bcebos.com/s2t/aishell/asr1/transformer.model.tar.gz) | Aishell Dataset | Char-based | 128 MB | Encoder:Transformer, Decoder:Transformer, Decoding method: Attention rescoring | 0.0538 || 151 h | [Transformer  Aishell ASR1](../../examples/aishell/asr1) 
+[Conformer Librispeech ASR1 Model](https://paddlespeech.bj.bcebos.com/s2t/librispeech/asr1/conformer.model.tar.gz) | Librispeech Dataset | subword-based | 191 MB | Encoder:Conformer, Decoder:Transformer, Decoding method: Attention rescoring |-| 0.0337 | 960 h | [Conformer Librispeech ASR1](../../example/librispeech/asr1) 
+[Transformer Librispeech ASR1 Model](https://paddlespeech.bj.bcebos.com/s2t/librispeech/asr1/transformer.model.tar.gz) | Librispeech Dataset | subword-based | 131 MB  | Encoder:Transformer, Decoder:Transformer, Decoding method: Attention rescoring |-| 0.0381 | 960 h | [Transformer Librispeech ASR1](../../example/librispeech/asr1) 
+[Transformer Librispeech ASR2 Model](https://paddlespeech.bj.bcebos.com/s2t/librispeech/asr2/transformer.model.tar.gz) | Librispeech Dataset | subword-based | 131 MB  | Encoder:Transformer, Decoder:Transformer, Decoding method: JoinCTC w/ LM |-| 0.0240 | 960 h | [Transformer Librispeech ASR2](../../example/librispeech/asr2) 
+
+### Language Model based on NGram
 Language Model | Training Data | Token-based | Size | Descriptions
 :-------------:| :------------:| :-----: | -----: | :-----------------
 [English LM](https://deepspeech.bj.bcebos.com/en_lm/common_crawl_00.prune01111.trie.klm) |  [CommonCrawl(en.00)](http://web-language-models.s3-website-us-east-1.amazonaws.com/ngrams/en/deduped/en.00.deduped.xz) | Word-based | 8.3 GB | Pruned with 0 1 1 1 1; <br/> About 1.85 billion n-grams; <br/> 'trie'  binary with '-a 22 -q 8 -b 8'
 [Mandarin LM Small](https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm) | Baidu Internal Corpus | Char-based | 2.8 GB | Pruned with 0 1 2 4 4; <br/> About 0.13 billion n-grams; <br/> 'probing' binary with default settings
 [Mandarin LM Large](https://deepspeech.bj.bcebos.com/zh_lm/zhidao_giga.klm) | Baidu Internal Corpus | Char-based | 70.4 GB | No Pruning; <br/> About 3.7 billion n-grams; <br/> 'probing' binary with default settings
 
+### Speech Translation Models
+
+| Model                                                        | Training Data | Token-based | Size | Descriptions                                                 | BLEU  | Example Link                                                 |
+| ------------------------------------------------------------ | ------------- | ----------- | ---- | ------------------------------------------------------------ | ----- | ------------------------------------------------------------ |
+| [Transformer FAT-ST MTL En-Zh](https://paddlespeech.bj.bcebos.com/s2t/ted_en_zh/st1/fat_st_ted-en-zh.tar.gz) | Ted-En-Zh     | Spm         |      | Encoder:Transformer, Decoder:Transformer, <br />Decoding method: Attention | 20.80 | [Transformer Ted-En-Zh ST1](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/examples/ted_en_zh/st1) |
+
 
 ## Text-to-Speech Models
 
@@ -69,8 +68,11 @@ PANN | Audioset| [audioset_tagging_cnn](https://github.com/qiuqiangkong/audioset
 PANN | ESC-50 |[pann-esc50]("./examples/esc50/cls0")|[panns_cnn6.tar.gz](https://paddlespeech.bj.bcebos.com/cls/panns_cnn6.tar.gz), [panns_cnn10](https://paddlespeech.bj.bcebos.com/cls/panns_cnn10.tar.gz), [panns_cnn14.tar.gz](https://paddlespeech.bj.bcebos.com/cls/panns_cnn14.tar.gz)
 
 
-## Speech Translation Models
+## Speech Recognition Model  from paddle 1.8
+
+|                        Acoustic Model                        |         Training Data          | Token-based |   Size | Descriptions                                       | CER    | WER    | Hours of speech |
+| :----------------------------------------------------------: | :----------------------------: | :---------: | -----: | :------------------------------------------------- | :----- | :----- | :-------------- |
+| [Ds2 Offline Aishell model](https://deepspeech.bj.bcebos.com/mandarin_models/aishell_model_v1.8_to_v2.x.tar.gz) |        Aishell Dataset         | Char-based  | 234 MB | 2 Conv + 3 bidirectional GRU layers                | 0.0804 | -      | 151 h           |
+| [Ds2 Offline Librispeech model](https://deepspeech.bj.bcebos.com/eng_models/librispeech_v1.8_to_v2.x.tar.gz) |      Librispeech Dataset       | Word-based  | 307 MB | 2 Conv + 3 bidirectional sharing weight RNN layers | -      | 0.0685 | 960 h           |
+| [Ds2 Offline Baidu en8k model](https://deepspeech.bj.bcebos.com/eng_models/baidu_en8k_v1.8_to_v2.x.tar.gz) | Baidu Internal English Dataset | Word-based  | 273 MB | 2 Conv + 3 bidirectional GRU layers                | -      | 0.0541 | 8628 h          |
 
-Model Type | Dataset| Example Link | Pretrained Models | Model Size
-:-------------:| :------------:| :-----: | :-----: | :-----:
-FAT-ST | TED En-Zh |[FAT + Transformer+ASR MTL](./examples/ted_en_zh/st1)|[fat_st_ted-en-zh.tar.gz](https://paddlespeech.bj.bcebos.com/s2t/ted_en_zh/st1/fat_st_ted-en-zh.tar.gz) | 50.26M

From c30549f0a7865a6f459361be38638a8eec34173a Mon Sep 17 00:00:00 2001
From: TianYuan <white-sky@qq.com>
Date: Mon, 13 Dec 2021 16:02:21 +0800
Subject: [PATCH 04/23] Update README.md

---
 README.md | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/README.md b/README.md
index ca0e0b461..7ccbe4c5c 100644
--- a/README.md
+++ b/README.md
@@ -221,15 +221,15 @@ The current hyperlinks redirect to [Previous Parakeet](https://github.com/Paddle
 <table>
   <thead>
     <tr>
-      <th> Text-to-Speech Module Type <img width="110" height="1"> </th>
-      <th>  Model Type  </th>
-      <th> <img width="50" height="1"> Dataset  <img width="50" height="1"> </th>
-      <th> <img width="101" height="1"> Link <img width="105" height="1"> </th>
+      <th> Text-to-Speech Module Type </th>
+      <th> Model Type </th>
+      <th> Dataset </th>
+      <th> Link </th>
     </tr>
   </thead>
   <tbody>
     <tr>
-    <td> Text Frontend</td>
+    <td> Text Frontend </td>
     <td colspan="2"> &emsp; </td>
     <td>
     <a href = "./examples/other/tn">tn</a> / <a href = "./examples/other/g2p">g2p</a>
@@ -315,10 +315,10 @@ The current hyperlinks redirect to [Previous Parakeet](https://github.com/Paddle
 <table style="width:100%">
   <thead>
     <tr>
-      <th> <img width="150" height="1">Task <img width="150" height="1"></th>
-      <th> <img width="110" height="1">Dataset <img width="110" height="1"></th>
-      <th> <img width="110" height="1">Model Type <img width="110" height="1"></th>
-      <th> <img width="110" height="1">Link <img width="110" height="1"></th>
+      <th> Task </th>
+      <th> Dataset </th>
+      <th> Model Type </th>
+      <th> Link </th>
     </tr>
   </thead>
   <tbody>

From 2a1cbf2d85e4f517aae2b8b7ea8ff080bdcbbef2 Mon Sep 17 00:00:00 2001
From: TianYuan <white-sky@qq.com>
Date: Mon, 13 Dec 2021 16:45:41 +0800
Subject: [PATCH 05/23] Update README.md

---
 demos/speech_translation/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/demos/speech_translation/README.md b/demos/speech_translation/README.md
index 8bb322c52..caca05dd1 100644
--- a/demos/speech_translation/README.md
+++ b/demos/speech_translation/README.md
@@ -19,7 +19,7 @@ Here are sample files for this demo that can be downloaded:
 wget https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav
 ```
 
-### 3. Usage
+### 3. Usage (not support for Windows now)
 - Command Line(Recommended)
   ```bash
   paddlespeech st --input ./en.wav

From 7883e2c22ec1c8a1aa089fcbdaecacab1000282b Mon Sep 17 00:00:00 2001
From: TianYuan <white-sky@qq.com>
Date: Mon, 13 Dec 2021 16:47:24 +0800
Subject: [PATCH 06/23] Update README.md

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 7ccbe4c5c..1c94914fa 100644
--- a/README.md
+++ b/README.md
@@ -139,6 +139,7 @@ paddlespeech cls --input input.wav
 paddlespeech asr --lang zh --input input_16k.wav
 ```
 **Speech Translation** (English to Chinese)
+ (not support for Windows now)
 ```shell
 paddlespeech st --input input_16k.wav
 ```

From 965a57ef0e8db2411708db1292f497e7461f5a6d Mon Sep 17 00:00:00 2001
From: TianYuan <white-sky@qq.com>
Date: Mon, 13 Dec 2021 16:48:24 +0800
Subject: [PATCH 07/23] Update README.md

---
 paddlespeech/cli/README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/paddlespeech/cli/README.md b/paddlespeech/cli/README.md
index 25f1f718b..34466ec2f 100644
--- a/paddlespeech/cli/README.md
+++ b/paddlespeech/cli/README.md
@@ -17,6 +17,8 @@
  ```
  
  ## Speech Translation (English to Chinese)
+ 
+ (not support for Windows now)
  ```bash
  paddlespeech st --input input_16k.wav
  ```

From e98e80e82ab94ba4a89b141dd7078e42679fdf59 Mon Sep 17 00:00:00 2001
From: TianYuan <white-sky@qq.com>
Date: Mon, 13 Dec 2021 16:49:23 +0800
Subject: [PATCH 08/23] Update README.md

---
 README.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 1c94914fa..a1d6777e1 100644
--- a/README.md
+++ b/README.md
@@ -139,7 +139,8 @@ paddlespeech cls --input input.wav
 paddlespeech asr --lang zh --input input_16k.wav
 ```
 **Speech Translation** (English to Chinese)
- (not support for Windows now)
+
+(not support for Windows now)
 ```shell
 paddlespeech st --input input_16k.wav
 ```

From ca12a83d5a8a228e7bf04b0928a30cc61df870a0 Mon Sep 17 00:00:00 2001
From: TianYuan <white-sky@qq.com>
Date: Mon, 13 Dec 2021 11:36:28 +0000
Subject: [PATCH 09/23] update voc name

---
 paddlespeech/cli/tts/infer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paddlespeech/cli/tts/infer.py b/paddlespeech/cli/tts/infer.py
index 771b7d6dc..2684e9edf 100644
--- a/paddlespeech/cli/tts/infer.py
+++ b/paddlespeech/cli/tts/infer.py
@@ -465,7 +465,7 @@ class TTSExecutor(BaseExecutor):
 
         # vocoder
         # model: {model_name}_{dataset}
-        voc_name = '_'.join(voc.split('_')[:-1])
+        voc_name = voc[:voc.rindex('_')]
         voc_class = dynamic_import(voc_name, model_alias)
         voc_inference_class = dynamic_import(voc_name + '_inference',
                                              model_alias)

From 84025c5ffef97080b8012be9c0f190b91ed3281f Mon Sep 17 00:00:00 2001
From: TianYuan <white-sky@qq.com>
Date: Mon, 13 Dec 2021 19:46:05 +0800
Subject: [PATCH 10/23] Rename READEME.md to README.md

---
 examples/aishell/asr1/{READEME.md => README.md} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename examples/aishell/asr1/{READEME.md => README.md} (100%)

diff --git a/examples/aishell/asr1/READEME.md b/examples/aishell/asr1/README.md
similarity index 100%
rename from examples/aishell/asr1/READEME.md
rename to examples/aishell/asr1/README.md

From 9db1710ba78c18185e5180f366ff8e5e3d70b5e2 Mon Sep 17 00:00:00 2001
From: TianYuan <white-sky@qq.com>
Date: Mon, 13 Dec 2021 20:08:43 +0800
Subject: [PATCH 11/23] add conformer demos (#1108)

---
 docs/source/tts/demo.rst                 | 101 +++++++++++++++++++++++
 paddlespeech/t2s/frontend/zh_frontend.py |   4 +
 2 files changed, 105 insertions(+)

diff --git a/docs/source/tts/demo.rst b/docs/source/tts/demo.rst
index 4c2f86b14..ca2fd98e4 100644
--- a/docs/source/tts/demo.rst
+++ b/docs/source/tts/demo.rst
@@ -455,6 +455,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
     <b>CSMSC(Chinese)</b>
     <br>
     </br>
+
     <table border="2" cellspacing="1" cellpadding="1"> 
         <tr>
             <th align="center"> Text </th>
@@ -634,6 +635,106 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
             </td>
         </tr>   
     </table>
+
+    <br>
+    </br>
+
+    <table border="2" cellspacing="1" cellpadding="1"> 
+        <tr>
+            <th align="center"> FastSpeech2-Conformer + ParallelWaveGAN </th>
+        </tr>
+        <tr>
+            <td>
+                <audio controls="controls">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fastspeech2_conformer_baker_ckpt_0.5_pwg_baker_ckpt_0.4/001.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+        </tr>
+        <tr>
+            <td>
+                <audio controls="controls">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fastspeech2_conformer_baker_ckpt_0.5_pwg_baker_ckpt_0.4/002.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+        </tr>
+        <tr>
+            <td>
+                <audio controls="controls">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fastspeech2_conformer_baker_ckpt_0.5_pwg_baker_ckpt_0.4/003.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+        </tr>
+
+        <tr>
+            <td>
+                <audio controls="controls">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fastspeech2_conformer_baker_ckpt_0.5_pwg_baker_ckpt_0.4/004.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+        </tr>
+        <tr>
+            <td>
+                <audio controls="controls">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fastspeech2_conformer_baker_ckpt_0.5_pwg_baker_ckpt_0.4/005.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+        </tr>
+        <tr>
+            <td>
+                <audio controls="controls">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fastspeech2_conformer_baker_ckpt_0.5_pwg_baker_ckpt_0.4/006.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+        </tr>
+        <tr>
+            <td>
+                <audio controls="controls">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fastspeech2_conformer_baker_ckpt_0.5_pwg_baker_ckpt_0.4/007.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+        </tr>
+        <tr>
+            <td>
+                <audio controls="controls">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fastspeech2_conformer_baker_ckpt_0.5_pwg_baker_ckpt_0.4/008.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+        </tr>
+        <tr>
+            <td>
+                <audio controls="controls">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fastspeech2_conformer_baker_ckpt_0.5_pwg_baker_ckpt_0.4/009.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+        </tr>   
+    </table>
     </div>
     <br>
     <br>
diff --git a/paddlespeech/t2s/frontend/zh_frontend.py b/paddlespeech/t2s/frontend/zh_frontend.py
index b59060a36..8eb55ff25 100644
--- a/paddlespeech/t2s/frontend/zh_frontend.py
+++ b/paddlespeech/t2s/frontend/zh_frontend.py
@@ -137,6 +137,10 @@ class Frontend():
             phones_list.append(phones)
         if merge_sentences:
             merge_list = sum(phones_list, [])
+            # rm the last 'sp' to avoid the noise at the end
+            # cause in the training data, no 'sp' in the end
+            if merge_list[-1] == 'sp':
+                merge_list = merge_list[:-1]
             phones_list = []
             phones_list.append(merge_list)
         return phones_list

From cce45cbcdaf5fbd1dee1b7c1b9880a5d1fdf618d Mon Sep 17 00:00:00 2001
From: AK391 <81195143+AK391@users.noreply.github.com>
Date: Mon, 13 Dec 2021 18:20:22 -0500
Subject: [PATCH 12/23] add HF badge

---
 README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/README.md b/README.md
index a1d6777e1..b071ed803 100644
--- a/README.md
+++ b/README.md
@@ -149,6 +149,8 @@ paddlespeech st --input input_16k.wav
 paddlespeech tts --input "你好，欢迎使用百度飞桨深度学习框架！" --output output.wav
 ```
 
+- web demo for Text to Speech is integrated to [Huggingface Spaces](https://huggingface.co/spaces) with [Gradio](https://github.com/gradio-app/gradio). See demo: [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/akhaliq/paddlespeech)
+  
 If you want to try more functions like training and tuning, please have a look at [Speech-to-Text Quick Start](./docs/source/asr/quick_start.md) and [Text-to-Speech Quick Start](./docs/source/tts/quick_start.md).
 
 ## Model List

From 3de4130dfcb02d6dfd0af6234991532405679bb0 Mon Sep 17 00:00:00 2001
From: TianYuan <white-sky@qq.com>
Date: Tue, 14 Dec 2021 02:34:44 +0000
Subject: [PATCH 13/23] update am name

---
 paddlespeech/cli/tts/infer.py | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/paddlespeech/cli/tts/infer.py b/paddlespeech/cli/tts/infer.py
index 2684e9edf..65d6d5282 100644
--- a/paddlespeech/cli/tts/infer.py
+++ b/paddlespeech/cli/tts/infer.py
@@ -403,8 +403,6 @@ class TTSExecutor(BaseExecutor):
         with open(self.voc_config) as f:
             self.voc_config = CfgNode(yaml.safe_load(f))
 
-        # Enter the path of model root
-
         with open(self.phones_dict, "r") as f:
             phn_id = [line.strip().split() for line in f.readlines()]
         vocab_size = len(phn_id)
@@ -499,10 +497,10 @@ class TTSExecutor(BaseExecutor):
         """
         Model inference and result stored in self.output.
         """
-        model_name = am[:am.rindex('_')]
-        dataset = am[am.rindex('_') + 1:]
+        am_name = am[:am.rindex('_')]
+        am_dataset = am[am.rindex('_') + 1:]
         get_tone_ids = False
-        if 'speedyspeech' in model_name:
+        if am_name == 'speedyspeech':
             get_tone_ids = True
         if lang == 'zh':
             input_ids = self.frontend.get_input_ids(
@@ -519,15 +517,14 @@ class TTSExecutor(BaseExecutor):
             print("lang should in {'zh', 'en'}!")
 
         # am
-        if 'speedyspeech' in model_name:
+        if am_name == 'speedyspeech':
             mel = self.am_inference(phone_ids, tone_ids)
         # fastspeech2
         else:
             # multi speaker
-            if dataset in {"aishell3", "vctk"}:
+            if am_dataset in {"aishell3", "vctk"}:
                 mel = self.am_inference(
                     phone_ids, spk_id=paddle.to_tensor(spk_id))
-
             else:
                 mel = self.am_inference(phone_ids)
 

From 9986b435c1b6d192285013e4bb0e4a390ad56297 Mon Sep 17 00:00:00 2001
From: AK391 <81195143+AK391@users.noreply.github.com>
Date: Mon, 13 Dec 2021 21:57:09 -0500
Subject: [PATCH 14/23] move badge

---
 README.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index b071ed803..f8b17c578 100644
--- a/README.md
+++ b/README.md
@@ -14,6 +14,7 @@
 ![License](https://img.shields.io/badge/license-Apache%202-red.svg)
 ![python version](https://img.shields.io/badge/python-3.7+-orange.svg)
 ![support os](https://img.shields.io/badge/os-linux-yellow.svg)
+[![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/akhaliq/paddlespeech)
 
 <!---
 from https://github.com/18F/open-source-guide/blob/18f-pages/pages/making-readmes-readable.md
@@ -149,7 +150,7 @@ paddlespeech st --input input_16k.wav
 paddlespeech tts --input "你好，欢迎使用百度飞桨深度学习框架！" --output output.wav
 ```
 
-- web demo for Text to Speech is integrated to [Huggingface Spaces](https://huggingface.co/spaces) with [Gradio](https://github.com/gradio-app/gradio). See demo: [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/akhaliq/paddlespeech)
+- web demo for Text to Speech is integrated to [Huggingface Spaces](https://huggingface.co/spaces) with [Gradio](https://github.com/gradio-app/gradio).
   
 If you want to try more functions like training and tuning, please have a look at [Speech-to-Text Quick Start](./docs/source/asr/quick_start.md) and [Text-to-Speech Quick Start](./docs/source/tts/quick_start.md).
 

From 4ab488ad9d8d86b207930a66d1d921b22d51d5fe Mon Sep 17 00:00:00 2001
From: AK391 <81195143+AK391@users.noreply.github.com>
Date: Mon, 13 Dec 2021 22:20:44 -0500
Subject: [PATCH 15/23] add back link

---
 README.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index f8b17c578..881c5e9bf 100644
--- a/README.md
+++ b/README.md
@@ -150,7 +150,8 @@ paddlespeech st --input input_16k.wav
 paddlespeech tts --input "你好，欢迎使用百度飞桨深度学习框架！" --output output.wav
 ```
 
-- web demo for Text to Speech is integrated to [Huggingface Spaces](https://huggingface.co/spaces) with [Gradio](https://github.com/gradio-app/gradio).
+- web demo for Text to Speech is integrated to [Huggingface Spaces](https://huggingface.co/spaces) with [Gradio](https://github.com/gradio-app/gradio). See Demo: [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/akhaliq/paddlespeech)
+
   
 If you want to try more functions like training and tuning, please have a look at [Speech-to-Text Quick Start](./docs/source/asr/quick_start.md) and [Text-to-Speech Quick Start](./docs/source/tts/quick_start.md).
 

From b96c76a7aeb152d8dd0f2df80f187f194e173099 Mon Sep 17 00:00:00 2001
From: AK391 <81195143+AK391@users.noreply.github.com>
Date: Mon, 13 Dec 2021 22:27:11 -0500
Subject: [PATCH 16/23] changes

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 881c5e9bf..cc47b0752 100644
--- a/README.md
+++ b/README.md
@@ -14,7 +14,7 @@
 ![License](https://img.shields.io/badge/license-Apache%202-red.svg)
 ![python version](https://img.shields.io/badge/python-3.7+-orange.svg)
 ![support os](https://img.shields.io/badge/os-linux-yellow.svg)
-[![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/akhaliq/paddlespeech)
+[![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)]()
 
 <!---
 from https://github.com/18F/open-source-guide/blob/18f-pages/pages/making-readmes-readable.md
@@ -150,7 +150,7 @@ paddlespeech st --input input_16k.wav
 paddlespeech tts --input "你好，欢迎使用百度飞桨深度学习框架！" --output output.wav
 ```
 
-- web demo for Text to Speech is integrated to [Huggingface Spaces](https://huggingface.co/spaces) with [Gradio](https://github.com/gradio-app/gradio). See Demo: [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/akhaliq/paddlespeech)
+- web demo for Text to Speech is integrated to [Huggingface Spaces](https://huggingface.co/spaces) with [Gradio](https://github.com/gradio-app/gradio). See Demo: https://huggingface.co/spaces/akhaliq/paddlespeech
 
   
 If you want to try more functions like training and tuning, please have a look at [Speech-to-Text Quick Start](./docs/source/asr/quick_start.md) and [Text-to-Speech Quick Start](./docs/source/tts/quick_start.md).

From 1138c9dae754e878c73cf9bf7f3723d091bd3d33 Mon Sep 17 00:00:00 2001
From: TianYuan <white-sky@qq.com>
Date: Tue, 14 Dec 2021 11:30:19 +0800
Subject: [PATCH 17/23] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index cc47b0752..56c823c69 100644
--- a/README.md
+++ b/README.md
@@ -14,7 +14,7 @@
 ![License](https://img.shields.io/badge/license-Apache%202-red.svg)
 ![python version](https://img.shields.io/badge/python-3.7+-orange.svg)
 ![support os](https://img.shields.io/badge/os-linux-yellow.svg)
-[![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)]()
+[![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)]
 
 <!---
 from https://github.com/18F/open-source-guide/blob/18f-pages/pages/making-readmes-readable.md

From 8cdbe3a6c0fe447e54cfbcfd82139d2869f5fc49 Mon Sep 17 00:00:00 2001
From: TianYuan <white-sky@qq.com>
Date: Tue, 14 Dec 2021 11:31:11 +0800
Subject: [PATCH 18/23] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 56c823c69..48a490851 100644
--- a/README.md
+++ b/README.md
@@ -14,7 +14,7 @@
 ![License](https://img.shields.io/badge/license-Apache%202-red.svg)
 ![python version](https://img.shields.io/badge/python-3.7+-orange.svg)
 ![support os](https://img.shields.io/badge/os-linux-yellow.svg)
-[![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)]
+![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)
 
 <!---
 from https://github.com/18F/open-source-guide/blob/18f-pages/pages/making-readmes-readable.md

From 5f0f76f249714f8d845ecd89926d9c83c2309a33 Mon Sep 17 00:00:00 2001
From: TianYuan <white-sky@qq.com>
Date: Tue, 14 Dec 2021 14:07:47 +0800
Subject: [PATCH 19/23] add eval() for inference model (#1114)

---
 paddlespeech/cli/tts/infer.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/paddlespeech/cli/tts/infer.py b/paddlespeech/cli/tts/infer.py
index fdf93e2c2..b3733e059 100644
--- a/paddlespeech/cli/tts/infer.py
+++ b/paddlespeech/cli/tts/infer.py
@@ -461,6 +461,7 @@ class TTSExecutor(BaseExecutor):
         am_std = paddle.to_tensor(am_std)
         am_normalizer = ZScore(am_mu, am_std)
         self.am_inference = am_inference_class(am_normalizer, am)
+        self.am_inference.eval()
         print("acoustic model done!")
 
         # vocoder
@@ -478,6 +479,7 @@ class TTSExecutor(BaseExecutor):
         voc_std = paddle.to_tensor(voc_std)
         voc_normalizer = ZScore(voc_mu, voc_std)
         self.voc_inference = voc_inference_class(voc_normalizer, voc)
+        self.voc_inference.eval()
         print("voc done!")
 
     def preprocess(self, input: Any, *args, **kwargs):

From 65f684806e0fc38dade04e823d07dc69bffc6425 Mon Sep 17 00:00:00 2001
From: Mingxue-Xu <92848346+Mingxue-Xu@users.noreply.github.com>
Date: Tue, 14 Dec 2021 16:40:08 +0800
Subject: [PATCH 20/23] [DOCS] Correct the grammar and spelling mistakes of
 install.md (#1115)

---
 docs/source/install.md | 31 ++++++++++++++-----------------
 1 file changed, 14 insertions(+), 17 deletions(-)

diff --git a/docs/source/install.md b/docs/source/install.md
index 3eb175322..3bf477018 100644
--- a/docs/source/install.md
+++ b/docs/source/install.md
@@ -1,8 +1,8 @@
 # Installation
-There are 3 ways to use `PaddleSpeech`. According to the degree of difficulty, the 3 ways can be divided into `Easy`, `Medium` and `Hard`.
+There are 3 ways to use `PaddleSpeech`. According to the degree of difficulty, the 3 ways can be divided into **Easy**, **Medium** and **Hard**.
 
-## Easy: Get the Basic Funcition Without Your Own Mechine
-If you are a newer of `PaddleSpeech` and want to experience it easily without your own mechine. We recommand you to use [AI Studio](https://aistudio.baidu.com/aistudio/index) to experience it. There is a step-by-step tutorial for `PaddleSpeech` and you can use the basic function of `PaddleSpeech` with a free machine.
+## Easy: Get the Basic Function without Your Own Machine
+If you are newer to `PaddleSpeech` and want to experience it easily without your own machine. We recommend you to use [AI Studio](https://aistudio.baidu.com/aistudio/index) to experience it. There is a step-by-step tutorial for `PaddleSpeech` and you can use the basic function of `PaddleSpeech` with a free machine.
 
 ## Prerequisites for Medium and Hard
 - Python >= 3.7
@@ -10,11 +10,11 @@ If you are a newer of `PaddleSpeech` and want to experience it easily without yo
 - Only Linux is supported
 - Hip: Do not use command `sh` instead of command `bash`
 
-## Medium: Get the Basic Funciton on Your Mechine
-If you want to install `paddlespeech` on your own mechine. There are 3 steps you need to do.
+## Medium: Get the Basic Function on Your Machine
+If you want to install `paddlespeech` on your own machine. There are 3 steps you need to do.
 
-### Install the Conda
-Conda is environment management system. You can go to [minicoda](https://docs.conda.io/en/latest/miniconda.html) to select a version (py>=3.7) and install it by yourself or you can use the following command:
+### Install Conda
+Conda is a management system of the environment. You can go to [minicoda](https://docs.conda.io/en/latest/miniconda.html) to select a version (py>=3.7) and install it by yourself or you can use the following command:
 ```bash
 # download the miniconda
 wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
@@ -43,30 +43,27 @@ For example, for CUDA 10.2, CuDNN7.5 install paddle 2.2.0:
 python3 -m pip install paddlepaddle-gpu==2.2.0
 ```
 ### Install PaddleSpeech 
-To Install  `paddlespeech`, there are two methods. You can use the following command:
+To install  `paddlespeech`, there are two methods. You can use the following command:
 ```bash
 pip install paddlespeech
 ```
-If you install `paddlespeech` by `pip`, you can use it to help you build your own model. However, you can not use the `ready-made `examples in paddlespeech. 
+If you install `paddlespeech` by `pip`, you can use it to help you build your model. However, you can not use the `ready-made `examples in paddlespeech. 
 
-If you want to use the` ready-made `examples in `paddlespeech`, you need to clone this repository and install  `paddlespeech`  by the foll
+If you want to use the` ready-made `examples in `paddlespeech`, you need to clone this repository and install  `paddlespeech`  by the following commands:
 ```bash
 https://github.com/PaddlePaddle/PaddleSpeech.git
 cd PaddleSpeech
 pip install .
 ```
-## Hard: Get the Full Funciton on Your Mechine
+## Hard: Get the Full Function on Your Machine
 ### Prerequisites
 - choice 1: working with `Ubuntu` Docker Container.
-
-  or
-
 - choice 2: working on `Ubuntu` with `root` privilege. 
 
-To avoid the trouble of environment setup, [running in Docker container](#running-in-docker-container) is highly recommended. Otherwise If you work on `Ubuntu` with `root` privilege, you can skip the next step.
+To avoid the trouble of environment setup, [running in Docker container](#running-in-docker-container) is highly recommended. Otherwise, if you work on `Ubuntu` with `root` privilege, you can skip the next step.
 
 ### Choice 1: Running in Docker Container (Recommand)
-Docker is an open source tool to build, ship, and run distributed applications in an isolated environment. A Docker image for this project has been provided in [hub.docker.com](https://hub.docker.com) with all the dependencies installed. This Docker image requires the support of NVIDIA GPU, so please make sure its availiability and the [nvidia-docker](https://github.com/NVIDIA/nvidia-docker) has been installed.
+Docker is an open-source tool to build, ship, and run distributed applications in an isolated environment. A Docker image for this project has been provided in [hub.docker.com](https://hub.docker.com) with all the dependencies installed. This Docker image requires the support of NVIDIA GPU, so please make sure its availability and the [nvidia-docker](https://github.com/NVIDIA/nvidia-docker) has been installed.
 
 Take several steps to launch the Docker image:
 - Download the Docker image
@@ -115,7 +112,7 @@ For example, for CUDA 10.2, CuDNN7.5 install paddle 2.2.0:
 ```bash
 python3 -m pip install paddlepaddle-gpu==2.2.0
 ```
-### Get the Funcition for Developing PaddleSpeech
+### Get the Function for Developing PaddleSpeech
 ```bash
 pip install .[develop]
 ```

From c4a79ccea4e017e070bec99f35ceca4d948563c2 Mon Sep 17 00:00:00 2001
From: Hui Zhang <zhtclz@foxmail.com>
Date: Tue, 14 Dec 2021 17:27:48 +0800
Subject: [PATCH 21/23] [asr] update librispeech conformer result (#1116)

* update librispeech result

* change conf order
---
 examples/librispeech/asr1/RESULTS.md          | 13 ++--
 .../asr1/conf/chunk_conformer.yaml            | 71 ++++++++---------
 .../asr1/conf/chunk_transformer.yaml          | 69 ++++++++--------
 examples/librispeech/asr1/conf/conformer.yaml | 78 ++++++++-----------
 4 files changed, 105 insertions(+), 126 deletions(-)

diff --git a/examples/librispeech/asr1/RESULTS.md b/examples/librispeech/asr1/RESULTS.md
index 1aba73d1c..d5f5a9a46 100644
--- a/examples/librispeech/asr1/RESULTS.md
+++ b/examples/librispeech/asr1/RESULTS.md
@@ -1,15 +1,18 @@
 # LibriSpeech
 
 ## Conformer
+train: Epoch 70, 4 V100-32G, best avg: 20
+
 | Model | Params | Config | Augmentation| Test set | Decode method | Loss | WER |  
 | --- | --- | --- | --- | --- | --- | --- | --- |
-| conformer | 47.63 M | conf/conformer.yaml | spec_aug + shift | test-clean | attention | 6.738649845123291 | 0.041159 |  
-| conformer | 47.63 M | conf/conformer.yaml | spec_aug + shift | test-clean | ctc_greedy_search | 6.738649845123291 | 0.039847 |  
-| conformer | 47.63 M | conf/conformer.yaml | spec_aug + shift | test-clean | ctc_prefix_beam_search | 6.738649845123291 | 0.039790 |  
-| conformer | 47.63 M | conf/conformer.yaml | spec_aug + shift | test-clean | attention_rescoring | 6.738649845123291 | 0.034617 |  
+| conformer | 47.63 M | conf/conformer.yaml | spec_aug | test-clean | attention | 6.433612394332886 | 0.039771 |  
+| conformer | 47.63 M | conf/conformer.yaml | spec_aug | test-clean | ctc_greedy_search | 6.433612394332886 | 0.040342 |  
+| conformer | 47.63 M | conf/conformer.yaml | spec_aug | test-clean | ctc_prefix_beam_search | 6.433612394332886 | 0.040342 |  
+| conformer | 47.63 M | conf/conformer.yaml | spec_aug | test-clean | attention_rescoring | 6.433612394332886 | 0.033761 |  
 
 
 ## Chunk Conformer
+
 | Model | Params | Config | Augmentation| Test set | Decode method | Chunk Size & Left Chunks | Loss | WER |  
 | --- | --- | --- | --- | --- | --- | --- | --- | --- |  
 | conformer | 47.63 M | conf/chunk_conformer.yaml | spec_aug + shift | test-clean | attention | 16, -1 | 7.11 | 0.063193 |  
@@ -20,7 +23,7 @@
 
 ## Transformer
 
-train: Epoch 120, 4 V100-32G, 27 Day, avg: 10
+train: Epoch 120, 4 V100-32G, 27 Day, best avg: 10
 
 | Model | Params | Config | Augmentation| Test set | Decode method | Loss | WER |  
 | --- | --- | --- | --- | --- | --- | --- | --- |
diff --git a/examples/librispeech/asr1/conf/chunk_conformer.yaml b/examples/librispeech/asr1/conf/chunk_conformer.yaml
index 7f5930378..2872b69ef 100644
--- a/examples/librispeech/asr1/conf/chunk_conformer.yaml
+++ b/examples/librispeech/asr1/conf/chunk_conformer.yaml
@@ -1,41 +1,3 @@
-# https://yaml.org/type/float.html
-data:
-  train_manifest: data/manifest.train
-  dev_manifest: data/manifest.dev
-  test_manifest: data/manifest.test
-  min_input_len: 0.5
-  max_input_len: 30.0
-  min_output_len: 0.0
-  max_output_len: 400.0
-  min_output_input_ratio: 0.05
-  max_output_input_ratio: 100.0
-
-collator:
-  vocab_filepath: data/lang_char/vocab.txt 
-  unit_type: 'spm'
-  spm_model_prefix: 'data/lang_char/bpe_unigram_5000'
-  mean_std_filepath: ""
-  augmentation_config: conf/preprocess.yaml
-  batch_size: 16
-  raw_wav: True  # use raw_wav or kaldi feature
-  spectrum_type: fbank #linear, mfcc, fbank
-  feat_dim: 80
-  delta_delta: False
-  dither: 1.0
-  target_sample_rate: 16000
-  max_freq: None
-  n_fft: None
-  stride_ms: 10.0
-  window_ms: 25.0
-  use_dB_normalization: True
-  target_dB: -20
-  random_seed: 0
-  keep_transcription_text: False
-  sortagrad: True 
-  shuffle_method: batch_shuffle
-  num_workers: 2
-
-
 # network architecture
 model:
     cmvn_file: 
@@ -80,6 +42,39 @@ model:
         length_normalized_loss: false
 
 
+data:
+  train_manifest: data/manifest.train
+  dev_manifest: data/manifest.dev
+  test_manifest: data/manifest.test
+
+
+collator:
+  vocab_filepath: data/lang_char/vocab.txt 
+  unit_type: 'spm'
+  spm_model_prefix: 'data/lang_char/bpe_unigram_5000'
+  mean_std_filepath: ""
+  augmentation_config: conf/preprocess.yaml
+  batch_size: 16
+  raw_wav: True  # use raw_wav or kaldi feature
+  spectrum_type: fbank #linear, mfcc, fbank
+  feat_dim: 80
+  delta_delta: False
+  dither: 1.0
+  target_sample_rate: 16000
+  max_freq: None
+  n_fft: None
+  stride_ms: 10.0
+  window_ms: 25.0
+  use_dB_normalization: True
+  target_dB: -20
+  random_seed: 0
+  keep_transcription_text: False
+  sortagrad: True 
+  shuffle_method: batch_shuffle
+  num_workers: 2
+
+
+
 training:
   n_epoch: 240
   accum_grad: 8
diff --git a/examples/librispeech/asr1/conf/chunk_transformer.yaml b/examples/librispeech/asr1/conf/chunk_transformer.yaml
index 366d6de0f..275e940af 100644
--- a/examples/librispeech/asr1/conf/chunk_transformer.yaml
+++ b/examples/librispeech/asr1/conf/chunk_transformer.yaml
@@ -1,41 +1,3 @@
-# https://yaml.org/type/float.html
-data:
-  train_manifest: data/manifest.train
-  dev_manifest: data/manifest.dev
-  test_manifest: data/manifest.test
-  min_input_len: 0.5  # second
-  max_input_len: 30.0 # second
-  min_output_len: 0.0 # tokens
-  max_output_len: 400.0 # tokens
-  min_output_input_ratio: 0.05
-  max_output_input_ratio: 100.0
-
-collator:
-  vocab_filepath: data/lang_char/vocab.txt 
-  unit_type: 'spm'
-  spm_model_prefix: 'data/lang_char/bpe_unigram_5000'
-  mean_std_filepath: ""
-  augmentation_config: conf/preprocess.yaml
-  batch_size: 64
-  raw_wav: True  # use raw_wav or kaldi feature
-  spectrum_type: fbank #linear, mfcc, fbank
-  feat_dim: 80
-  delta_delta: False
-  dither: 1.0
-  target_sample_rate: 16000
-  max_freq: None
-  n_fft: None
-  stride_ms: 10.0
-  window_ms: 25.0
-  use_dB_normalization: True
-  target_dB: -20
-  random_seed: 0
-  keep_transcription_text: False
-  sortagrad: True 
-  shuffle_method: batch_shuffle
-  num_workers: 2
-
-
 # network architecture
 model:
     cmvn_file: 
@@ -73,6 +35,37 @@ model:
         length_normalized_loss: false
 
 
+data:
+  train_manifest: data/manifest.train
+  dev_manifest: data/manifest.dev
+  test_manifest: data/manifest.test
+
+collator:
+  vocab_filepath: data/lang_char/vocab.txt 
+  unit_type: 'spm'
+  spm_model_prefix: 'data/lang_char/bpe_unigram_5000'
+  mean_std_filepath: ""
+  augmentation_config: conf/preprocess.yaml
+  batch_size: 64
+  raw_wav: True  # use raw_wav or kaldi feature
+  spectrum_type: fbank #linear, mfcc, fbank
+  feat_dim: 80
+  delta_delta: False
+  dither: 1.0
+  target_sample_rate: 16000
+  max_freq: None
+  n_fft: None
+  stride_ms: 10.0
+  window_ms: 25.0
+  use_dB_normalization: True
+  target_dB: -20
+  random_seed: 0
+  keep_transcription_text: False
+  sortagrad: True 
+  shuffle_method: batch_shuffle
+  num_workers: 2
+
+
 training:
   n_epoch: 120
   accum_grad: 1
diff --git a/examples/librispeech/asr1/conf/conformer.yaml b/examples/librispeech/asr1/conf/conformer.yaml
index f02f24dc6..1193f14b1 100644
--- a/examples/librispeech/asr1/conf/conformer.yaml
+++ b/examples/librispeech/asr1/conf/conformer.yaml
@@ -1,41 +1,3 @@
-# https://yaml.org/type/float.html
-data:
-  train_manifest: data/manifest.train
-  dev_manifest: data/manifest.dev
-  test_manifest: data/manifest.test-clean
-  min_input_len: 0.5  # seconds
-  max_input_len: 30.0 # seconds
-  min_output_len: 0.0 # tokens
-  max_output_len: 400.0 # tokens
-  min_output_input_ratio: 0.05
-  max_output_input_ratio: 100.0
-
-collator:
-  vocab_filepath: data/lang_char/vocab.txt 
-  unit_type: 'spm'
-  spm_model_prefix: 'data/lang_char/bpe_unigram_5000'
-  mean_std_filepath: ""
-  augmentation_config: conf/preprocess.yaml
-  batch_size: 16
-  raw_wav: True  # use raw_wav or kaldi feature
-  spectrum_type: fbank #linear, mfcc, fbank
-  feat_dim: 80
-  delta_delta: False
-  dither: 1.0
-  target_sample_rate: 16000
-  max_freq: None
-  n_fft: None
-  stride_ms: 10.0
-  window_ms: 25.0
-  use_dB_normalization: True
-  target_dB: -20
-  random_seed: 0
-  keep_transcription_text: False
-  sortagrad: True 
-  shuffle_method: batch_shuffle
-  num_workers: 2
-
-
 # network architecture
 model:
     cmvn_file: 
@@ -76,8 +38,40 @@ model:
         length_normalized_loss: false
 
 
+data:
+  train_manifest: data/manifest.train
+  dev_manifest: data/manifest.dev
+  test_manifest: data/manifest.test-clean
+
+
+collator:
+  vocab_filepath: data/lang_char/vocab.txt 
+  unit_type: 'spm'
+  spm_model_prefix: 'data/lang_char/bpe_unigram_5000'
+  mean_std_filepath: ""
+  augmentation_config: conf/preprocess.yaml
+  batch_size: 16
+  raw_wav: True  # use raw_wav or kaldi feature
+  spectrum_type: fbank #linear, mfcc, fbank
+  feat_dim: 80
+  delta_delta: False
+  dither: 1.0
+  target_sample_rate: 16000
+  max_freq: None
+  n_fft: None
+  stride_ms: 10.0
+  window_ms: 25.0
+  use_dB_normalization: True
+  target_dB: -20
+  random_seed: 0
+  keep_transcription_text: False
+  sortagrad: True 
+  shuffle_method: batch_shuffle
+  num_workers: 2
+
+
 training:
-  n_epoch: 120
+  n_epoch: 70
   accum_grad: 8
   global_grad_clip: 3.0
   optim: adam
@@ -98,13 +92,7 @@ decoding:
   batch_size: 64
   error_rate_type: wer
   decoding_method: attention  # 'attention', 'ctc_greedy_search', 'ctc_prefix_beam_search', 'attention_rescoring'
-  lang_model_path: data/lm/common_crawl_00.prune01111.trie.klm
-  alpha: 2.5
-  beta: 0.3
   beam_size: 10
-  cutoff_prob: 1.0
-  cutoff_top_n: 0
-  num_proc_bsearch: 8
   ctc_weight: 0.5 # ctc weight for attention rescoring decode mode.
   decoding_chunk_size: -1 # decoding chunk size. Defaults to -1.
       # <0: for decoding, use full chunk.

From 51d7a07c6df7ddfd76d850ebad4713ae4d5ea125 Mon Sep 17 00:00:00 2001
From: Hui Zhang <zhtclz@foxmail.com>
Date: Tue, 14 Dec 2021 19:08:34 +0800
Subject: [PATCH 22/23] format and fix pre-commit (#1120)

---
 dataset/aidatatang_200zh/aidatatang_200zh.py   |  1 +
 dataset/aishell/aishell.py                     |  1 +
 dataset/librispeech/librispeech.py             |  1 +
 dataset/mini_librispeech/mini_librispeech.py   |  1 +
 dataset/musan/musan.py                         |  1 +
 dataset/rir_noise/rir_noise.py                 |  1 +
 dataset/thchs30/thchs30.py                     |  1 +
 dataset/timit/timit.py                         |  1 +
 dataset/voxforge/voxforge.py                   |  1 +
 docs/source/released_model.md                  |  1 -
 paddlespeech/cli/__init__.py                   |  2 +-
 paddlespeech/cls/exps/panns/deploy/predict.py  |  3 ++-
 paddlespeech/cls/exps/panns/export_model.py    |  2 +-
 paddlespeech/cls/exps/panns/predict.py         |  2 +-
 paddlespeech/cls/exps/panns/train.py           |  2 +-
 paddlespeech/cls/models/panns/panns.py         |  1 +
 paddlespeech/s2t/frontend/audio.py             |  7 ++++---
 paddlespeech/s2t/frontend/utility.py           |  6 ++++--
 paddlespeech/s2t/io/sampler.py                 |  4 ++--
 paddlespeech/s2t/models/ds2/__init__.py        |  4 ++--
 paddlespeech/s2t/models/ds2/deepspeech2.py     |  2 +-
 paddlespeech/s2t/models/ds2_online/__init__.py |  4 ++--
 paddlespeech/s2t/modules/ctc.py                |  2 +-
 paddlespeech/s2t/training/trainer.py           |  2 ++
 paddlespeech/s2t/transform/perturb.py          | 10 ++++++----
 utils/manifest_key_value.py                    |  1 +
 26 files changed, 41 insertions(+), 23 deletions(-)

diff --git a/dataset/aidatatang_200zh/aidatatang_200zh.py b/dataset/aidatatang_200zh/aidatatang_200zh.py
index b8758c9a7..85f478c20 100644
--- a/dataset/aidatatang_200zh/aidatatang_200zh.py
+++ b/dataset/aidatatang_200zh/aidatatang_200zh.py
@@ -25,6 +25,7 @@ import os
 from pathlib import Path
 
 import soundfile
+
 from utils.utility import download
 from utils.utility import unpack
 
diff --git a/dataset/aishell/aishell.py b/dataset/aishell/aishell.py
index 32dc119d2..7431fc083 100644
--- a/dataset/aishell/aishell.py
+++ b/dataset/aishell/aishell.py
@@ -25,6 +25,7 @@ import os
 from pathlib import Path
 
 import soundfile
+
 from utils.utility import download
 from utils.utility import unpack
 
diff --git a/dataset/librispeech/librispeech.py b/dataset/librispeech/librispeech.py
index 0c779696d..69f0db599 100644
--- a/dataset/librispeech/librispeech.py
+++ b/dataset/librispeech/librispeech.py
@@ -27,6 +27,7 @@ import os
 from multiprocessing.pool import Pool
 
 import soundfile
+
 from utils.utility import download
 from utils.utility import unpack
 
diff --git a/dataset/mini_librispeech/mini_librispeech.py b/dataset/mini_librispeech/mini_librispeech.py
index d96b5d64d..730c73a8b 100644
--- a/dataset/mini_librispeech/mini_librispeech.py
+++ b/dataset/mini_librispeech/mini_librispeech.py
@@ -26,6 +26,7 @@ import os
 from multiprocessing.pool import Pool
 
 import soundfile
+
 from utils.utility import download
 from utils.utility import unpack
 
diff --git a/dataset/musan/musan.py b/dataset/musan/musan.py
index dc237c30a..2ac701bed 100644
--- a/dataset/musan/musan.py
+++ b/dataset/musan/musan.py
@@ -28,6 +28,7 @@ import json
 import os
 
 import soundfile
+
 from utils.utility import download
 from utils.utility import unpack
 
diff --git a/dataset/rir_noise/rir_noise.py b/dataset/rir_noise/rir_noise.py
index 0e055f17b..e7b122890 100644
--- a/dataset/rir_noise/rir_noise.py
+++ b/dataset/rir_noise/rir_noise.py
@@ -28,6 +28,7 @@ import json
 import os
 
 import soundfile
+
 from utils.utility import download
 from utils.utility import unzip
 
diff --git a/dataset/thchs30/thchs30.py b/dataset/thchs30/thchs30.py
index 879ed58db..cdfc0a75c 100644
--- a/dataset/thchs30/thchs30.py
+++ b/dataset/thchs30/thchs30.py
@@ -26,6 +26,7 @@ from multiprocessing.pool import Pool
 from pathlib import Path
 
 import soundfile
+
 from utils.utility import download
 from utils.utility import unpack
 
diff --git a/dataset/timit/timit.py b/dataset/timit/timit.py
index d03c48a1e..c4a9f0663 100644
--- a/dataset/timit/timit.py
+++ b/dataset/timit/timit.py
@@ -27,6 +27,7 @@ import string
 from pathlib import Path
 
 import soundfile
+
 from utils.utility import unzip
 
 URL_ROOT = ""
diff --git a/dataset/voxforge/voxforge.py b/dataset/voxforge/voxforge.py
index c388f4491..373791bff 100644
--- a/dataset/voxforge/voxforge.py
+++ b/dataset/voxforge/voxforge.py
@@ -27,6 +27,7 @@ import shutil
 import subprocess
 
 import soundfile
+
 from utils.utility import download_multi
 from utils.utility import getfile_insensitive
 from utils.utility import unpack
diff --git a/docs/source/released_model.md b/docs/source/released_model.md
index 1b61ccc7b..91ef6d166 100644
--- a/docs/source/released_model.md
+++ b/docs/source/released_model.md
@@ -75,4 +75,3 @@ PANN | ESC-50 |[pann-esc50]("./examples/esc50/cls0")|[panns_cnn6.tar.gz](https:/
 | [Ds2 Offline Aishell model](https://deepspeech.bj.bcebos.com/mandarin_models/aishell_model_v1.8_to_v2.x.tar.gz) |        Aishell Dataset         | Char-based  | 234 MB | 2 Conv + 3 bidirectional GRU layers                | 0.0804 | -      | 151 h           |
 | [Ds2 Offline Librispeech model](https://deepspeech.bj.bcebos.com/eng_models/librispeech_v1.8_to_v2.x.tar.gz) |      Librispeech Dataset       | Word-based  | 307 MB | 2 Conv + 3 bidirectional sharing weight RNN layers | -      | 0.0685 | 960 h           |
 | [Ds2 Offline Baidu en8k model](https://deepspeech.bj.bcebos.com/eng_models/baidu_en8k_v1.8_to_v2.x.tar.gz) | Baidu Internal English Dataset | Word-based  | 273 MB | 2 Conv + 3 bidirectional GRU layers                | -      | 0.0541 | 8628 h          |
-
diff --git a/paddlespeech/cli/__init__.py b/paddlespeech/cli/__init__.py
index c82168aee..80ca7a665 100644
--- a/paddlespeech/cli/__init__.py
+++ b/paddlespeech/cli/__init__.py
@@ -11,8 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 import _locale
+
 from .asr import ASRExecutor
 from .base_commands import BaseCommand
 from .base_commands import HelpCommand
diff --git a/paddlespeech/cls/exps/panns/deploy/predict.py b/paddlespeech/cls/exps/panns/deploy/predict.py
index ee566ed4f..d4e5c22fb 100644
--- a/paddlespeech/cls/exps/panns/deploy/predict.py
+++ b/paddlespeech/cls/exps/panns/deploy/predict.py
@@ -16,10 +16,11 @@ import os
 
 import numpy as np
 from paddle import inference
+from scipy.special import softmax
+
 from paddleaudio.backends import load as load_audio
 from paddleaudio.datasets import ESC50
 from paddleaudio.features import melspectrogram
-from scipy.special import softmax
 
 # yapf: disable
 parser = argparse.ArgumentParser()
diff --git a/paddlespeech/cls/exps/panns/export_model.py b/paddlespeech/cls/exps/panns/export_model.py
index 63b22981a..c295c6a33 100644
--- a/paddlespeech/cls/exps/panns/export_model.py
+++ b/paddlespeech/cls/exps/panns/export_model.py
@@ -15,8 +15,8 @@ import argparse
 import os
 
 import paddle
-from paddleaudio.datasets import ESC50
 
+from paddleaudio.datasets import ESC50
 from paddlespeech.cls.models import cnn14
 from paddlespeech.cls.models import SoundClassifier
 
diff --git a/paddlespeech/cls/exps/panns/predict.py b/paddlespeech/cls/exps/panns/predict.py
index 0a1b6cccf..9cfd8b6ce 100644
--- a/paddlespeech/cls/exps/panns/predict.py
+++ b/paddlespeech/cls/exps/panns/predict.py
@@ -16,11 +16,11 @@ import argparse
 import numpy as np
 import paddle
 import paddle.nn.functional as F
+
 from paddleaudio.backends import load as load_audio
 from paddleaudio.datasets import ESC50
 from paddleaudio.features import LogMelSpectrogram
 from paddleaudio.features import melspectrogram
-
 from paddlespeech.cls.models import cnn14
 from paddlespeech.cls.models import SoundClassifier
 
diff --git a/paddlespeech/cls/exps/panns/train.py b/paddlespeech/cls/exps/panns/train.py
index 9508a977e..121309789 100644
--- a/paddlespeech/cls/exps/panns/train.py
+++ b/paddlespeech/cls/exps/panns/train.py
@@ -15,11 +15,11 @@ import argparse
 import os
 
 import paddle
+
 from paddleaudio.datasets import ESC50
 from paddleaudio.features import LogMelSpectrogram
 from paddleaudio.utils import logger
 from paddleaudio.utils import Timer
-
 from paddlespeech.cls.models import cnn14
 from paddlespeech.cls.models import SoundClassifier
 
diff --git a/paddlespeech/cls/models/panns/panns.py b/paddlespeech/cls/models/panns/panns.py
index b442b2fd1..6d2dac56a 100644
--- a/paddlespeech/cls/models/panns/panns.py
+++ b/paddlespeech/cls/models/panns/panns.py
@@ -15,6 +15,7 @@ import os
 
 import paddle.nn as nn
 import paddle.nn.functional as F
+
 from paddleaudio.utils.download import load_state_dict_from_url
 from paddleaudio.utils.env import MODEL_HOME
 
diff --git a/paddlespeech/s2t/frontend/audio.py b/paddlespeech/s2t/frontend/audio.py
index d494cc4fd..d0368cc8d 100644
--- a/paddlespeech/s2t/frontend/audio.py
+++ b/paddlespeech/s2t/frontend/audio.py
@@ -356,7 +356,7 @@ class AudioSegment():
         # sox, slow
         try:
             import soxbindings as sox
-        except:
+        except ImportError:
             try:
                 from paddlespeech.s2t.utils import dynamic_pip_install
                 package = "sox"
@@ -364,8 +364,9 @@ class AudioSegment():
                 package = "soxbindings"
                 dynamic_pip_install.install(package)
                 import soxbindings as sox
-            except:
-                raise RuntimeError("Can not install soxbindings on your system." )
+            except Exception:
+                raise RuntimeError(
+                    "Can not install soxbindings on your system.")
 
         tfm = sox.Transformer()
         tfm.set_globals(multithread=False)
diff --git a/paddlespeech/s2t/frontend/utility.py b/paddlespeech/s2t/frontend/utility.py
index e6c7603fa..d35785db6 100644
--- a/paddlespeech/s2t/frontend/utility.py
+++ b/paddlespeech/s2t/frontend/utility.py
@@ -102,9 +102,11 @@ def read_manifest(
     with jsonlines.open(manifest_path, 'r') as reader:
         for json_data in reader:
             feat_len = json_data["input"][0]["shape"][
-                0] if "input" in json_data and "shape" in json_data["input"][0] else 1.0
+                0] if "input" in json_data and "shape" in json_data["input"][
+                    0] else 1.0
             token_len = json_data["output"][0]["shape"][
-                0] if "output" in json_data and "shape" in json_data["output"][0] else 1.0
+                0] if "output" in json_data and "shape" in json_data["output"][
+                    0] else 1.0
             conditions = [
                 feat_len >= min_input_len,
                 feat_len <= max_input_len,
diff --git a/paddlespeech/s2t/io/sampler.py b/paddlespeech/s2t/io/sampler.py
index 35b57524b..ac55af123 100644
--- a/paddlespeech/s2t/io/sampler.py
+++ b/paddlespeech/s2t/io/sampler.py
@@ -20,13 +20,13 @@ from paddle.io import DistributedBatchSampler
 
 from paddlespeech.s2t.utils.log import Log
 
+logger = Log(__name__).getlog()
+
 __all__ = [
     "SortagradDistributedBatchSampler",
     "SortagradBatchSampler",
 ]
 
-logger = Log(__name__).getlog()
-
 
 def _batch_shuffle(indices, batch_size, epoch, clipped=False):
     """Put similarly-sized instances into minibatches for better efficiency
diff --git a/paddlespeech/s2t/models/ds2/__init__.py b/paddlespeech/s2t/models/ds2/__init__.py
index efa50863b..8d5959c8b 100644
--- a/paddlespeech/s2t/models/ds2/__init__.py
+++ b/paddlespeech/s2t/models/ds2/__init__.py
@@ -17,11 +17,11 @@ from paddlespeech.s2t.utils import dynamic_pip_install
 
 try:
     import swig_decoders
-except:
+except ImportError:
     try:
         package_name = 'paddlespeech_ctcdecoders'
         dynamic_pip_install.install(package_name)
-    except:
+    except Exception:
         raise RuntimeError(
             "Can not install package paddlespeech_ctcdecoders on your system. \
                 The DeepSpeech2 model is not supported for your system")
diff --git a/paddlespeech/s2t/models/ds2/deepspeech2.py b/paddlespeech/s2t/models/ds2/deepspeech2.py
index f0a553ec8..0dfaec29c 100644
--- a/paddlespeech/s2t/models/ds2/deepspeech2.py
+++ b/paddlespeech/s2t/models/ds2/deepspeech2.py
@@ -129,7 +129,7 @@ class DeepSpeech2Model(nn.Layer):
                 rnn_layer_size=1024,  #RNN layer size (number of RNN cells).
                 use_gru=True,  #Use gru if set True. Use simple rnn if set False.
                 share_rnn_weights=True,  #Whether to share input-hidden weights between forward and backward directional RNNs.Notice that for GRU, weight sharing is not supported.
-                ctc_grad_norm_type=None,))
+                ctc_grad_norm_type=None, ))
         if config is not None:
             config.merge_from_other_cfg(default)
         return default
diff --git a/paddlespeech/s2t/models/ds2_online/__init__.py b/paddlespeech/s2t/models/ds2_online/__init__.py
index 65ddd5122..2d304237b 100644
--- a/paddlespeech/s2t/models/ds2_online/__init__.py
+++ b/paddlespeech/s2t/models/ds2_online/__init__.py
@@ -17,11 +17,11 @@ from paddlespeech.s2t.utils import dynamic_pip_install
 
 try:
     import swig_decoders
-except:
+except ImportError:
     try:
         package_name = 'paddlespeech_ctcdecoders'
         dynamic_pip_install.install(package_name)
-    except:
+    except Exception:
         raise RuntimeError(
             "Can not install package paddlespeech_ctcdecoders on your system. \
                 The DeepSpeech2 model is not supported for your system")
diff --git a/paddlespeech/s2t/modules/ctc.py b/paddlespeech/s2t/modules/ctc.py
index 774bcc62e..ffc9f0387 100644
--- a/paddlespeech/s2t/modules/ctc.py
+++ b/paddlespeech/s2t/modules/ctc.py
@@ -28,7 +28,7 @@ try:
     from paddlespeech.s2t.decoders.ctcdecoder.swig_wrapper import ctc_beam_search_decoder_batch  # noqa: F401
     from paddlespeech.s2t.decoders.ctcdecoder.swig_wrapper import ctc_greedy_decoder  # noqa: F401
     from paddlespeech.s2t.decoders.ctcdecoder.swig_wrapper import Scorer  # noqa: F401
-except:
+except ImportError:
     try:
         from paddlespeech.s2t.utils import dynamic_pip_install
         package_name = 'paddlespeech_ctcdecoders'
diff --git a/paddlespeech/s2t/training/trainer.py b/paddlespeech/s2t/training/trainer.py
index cc8f50317..9bf1ca4db 100644
--- a/paddlespeech/s2t/training/trainer.py
+++ b/paddlespeech/s2t/training/trainer.py
@@ -221,6 +221,8 @@ class Trainer():
         if hasattr(self.train_loader, "batch_sampler"):
             batch_sampler = self.train_loader.batch_sampler
             if isinstance(batch_sampler, paddle.io.DistributedBatchSampler):
+                logger.debug(
+                    f"train_loader.batch_sample set epoch: {self.epoch}")
                 batch_sampler.set_epoch(self.epoch)
 
     def before_train(self):
diff --git a/paddlespeech/s2t/transform/perturb.py b/paddlespeech/s2t/transform/perturb.py
index 90144197c..226885f36 100644
--- a/paddlespeech/s2t/transform/perturb.py
+++ b/paddlespeech/s2t/transform/perturb.py
@@ -147,7 +147,7 @@ class SpeedPerturbationSox():
 
         try:
             import soxbindings as sox
-        except:
+        except ImportError:
             try:
                 from paddlespeech.s2t.utils import dynamic_pip_install
                 package = "sox"
@@ -155,8 +155,10 @@ class SpeedPerturbationSox():
                 package = "soxbindings"
                 dynamic_pip_install.install(package)
                 import soxbindings as sox
-            except:
-                raise RuntimeError("Can not install soxbindings on your system." )
+            except Exception:
+                raise RuntimeError(
+                    "Can not install soxbindings on your system.")
+        self.sox = sox
 
         if utt2ratio is not None:
             self.utt2ratio = {}
@@ -200,7 +202,7 @@ class SpeedPerturbationSox():
         else:
             ratio = self.state.uniform(self.lower, self.upper)
 
-        tfm = sox.Transformer()
+        tfm = self.sox.Transformer()
         tfm.set_globals(multithread=False)
         tfm.speed(ratio)
         y = tfm.build_array(input_array=x, sample_rate_in=self.sr)
diff --git a/utils/manifest_key_value.py b/utils/manifest_key_value.py
index 3a8009039..fb3d3aaaf 100755
--- a/utils/manifest_key_value.py
+++ b/utils/manifest_key_value.py
@@ -5,6 +5,7 @@ import functools
 from pathlib import Path
 
 import jsonlines
+
 from utils.utility import add_arguments
 from utils.utility import print_arguments
 

From 17258d2332162dabf4546dfb5455854b106f147b Mon Sep 17 00:00:00 2001
From: Jackwaterveg <87408988+Jackwaterveg@users.noreply.github.com>
Date: Tue, 14 Dec 2021 19:13:09 +0800
Subject: [PATCH 23/23] Update install.md (#1117)

---
 docs/source/install.md | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/docs/source/install.md b/docs/source/install.md
index 3bf477018..a976674d3 100644
--- a/docs/source/install.md
+++ b/docs/source/install.md
@@ -35,7 +35,14 @@ conda activate tools/venv
 ```
 Install  conda dependencies for `paddlespeech` :
 ```bash
-conda install -y -c conda-forge sox libsndfile swig bzip2 gcc_linux-64=8.4.0 gxx_linux-64=8.4.0
+conda install -y -c conda-forge sox libsndfile swig bzip2
+```
+Do not forget to install `gcc` and `gxx` on your system.
+If you use linux, you can use the script below to install them.
+
+(Hip: Do not use this script if you want to install by **Hard** way):
+```
+conda install -y -c gcc_linux-64=8.4.0 gxx_linux-64=8.4.0
 ```
 ### Install PaddlePaddle
 For example, for CUDA 10.2, CuDNN7.5 install paddle 2.2.0: