From a02654660a270478a4d405a312dce4e090d17a76 Mon Sep 17 00:00:00 2001
From: Zhao Yuting <91456992+THUzyt21@users.noreply.github.com>
Date: Tue, 20 Sep 2022 16:20:05 +0800
Subject: [PATCH 01/40] Update pretrained_models.py

Add a new model for faster text process
---
 paddlespeech/resource/pretrained_models.py | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/paddlespeech/resource/pretrained_models.py b/paddlespeech/resource/pretrained_models.py
index f049879a3..0a1ed15e1 100644
--- a/paddlespeech/resource/pretrained_models.py
+++ b/paddlespeech/resource/pretrained_models.py
@@ -529,7 +529,7 @@ text_dynamic_pretrained_models = {
             'ckpt/model_state.pdparams',
             'vocab_file':
             'punc_vocab.txt',
-        },
+        }
     },
     "ernie_linear_p3_wudao-punc-zh": {
         '1.0': {
@@ -543,10 +543,26 @@ text_dynamic_pretrained_models = {
             'ckpt/model_state.pdparams',
             'vocab_file':
             'punc_vocab.txt',
-        },
+        }
     },
+    "ernie_linear_p3_wudao_fast-punc-zh": {
+         '1.0':{
+            'url':
+            'https://paddlespeech.bj.bcebos.com/text/ernie_linear_p3_wudao_fast-punc-zh.tar.gz',
+            'md5':
+            'c93f9594119541a5dbd763381a751d08',
+            'cfg_path':
+            'ckpt/model_config.json',
+            'ckpt_path':
+            'ckpt/model_state.pdparams',
+            'vocab_file':
+            'punc_vocab.txt',
+        }
+    }
 }
 
+
+
 # ---------------------------------
 # -------------- TTS --------------
 # ---------------------------------

From b627666ce9fde479793e492a063d6c977f12cf60 Mon Sep 17 00:00:00 2001
From: Zhao Yuting <91456992+THUzyt21@users.noreply.github.com>
Date: Tue, 20 Sep 2022 16:22:32 +0800
Subject: [PATCH 02/40] Update model_alias.py

Add a new model for faster text process in cli
---
 paddlespeech/resource/model_alias.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/paddlespeech/resource/model_alias.py b/paddlespeech/resource/model_alias.py
index 9c76dd4b3..85187a8d1 100644
--- a/paddlespeech/resource/model_alias.py
+++ b/paddlespeech/resource/model_alias.py
@@ -51,6 +51,10 @@ model_alias = {
         "paddlespeech.text.models:ErnieLinear",
         "paddlenlp.transformers:ErnieTokenizer"
     ],
+    "ernie_linear_p3_wudao": [
+        "paddlespeech.text.models:ErnieLinear",
+        "paddlenlp.transformers:ErnieTokenizer"
+    ],
 
     # ---------------------------------
     # -------------- TTS --------------

From 57dcd0d17f559a5f22c83a0d321f4db9d57d08d9 Mon Sep 17 00:00:00 2001
From: Zhao Yuting <91456992+THUzyt21@users.noreply.github.com>
Date: Tue, 20 Sep 2022 16:29:10 +0800
Subject: [PATCH 03/40] Update infer.py

change the infer in order to implement the new faster model for text
---
 paddlespeech/cli/text/infer.py | 91 ++++++++++++++++++++++++++++++----
 1 file changed, 82 insertions(+), 9 deletions(-)

diff --git a/paddlespeech/cli/text/infer.py b/paddlespeech/cli/text/infer.py
index 24b8c9c25..ff822f674 100644
--- a/paddlespeech/cli/text/infer.py
+++ b/paddlespeech/cli/text/infer.py
@@ -20,10 +20,13 @@ from typing import Optional
 from typing import Union
 
 import paddle
+import yaml
+from yacs.config import CfgNode
 
 from ..executor import BaseExecutor
 from ..log import logger
 from ..utils import stats_wrapper
+from paddlespeech.text.models.ernie_linear import ErnieLinear
 
 __all__ = ['TextExecutor']
 
@@ -139,6 +142,66 @@ class TextExecutor(BaseExecutor):
 
         self.model.eval()
 
+    #init new models
+    def _init_from_path_new(self,
+                            task: str='punc',
+                            model_type: str='ernie_linear_p7_wudao',
+                            lang: str='zh',
+                            cfg_path: Optional[os.PathLike]=None,
+                            ckpt_path: Optional[os.PathLike]=None,
+                            vocab_file: Optional[os.PathLike]=None):
+        if hasattr(self, 'model'):
+            logger.debug('Model had been initialized.')
+            return
+
+        self.task = task
+
+        if cfg_path is None or ckpt_path is None or vocab_file is None:
+            tag = '-'.join([model_type, task, lang])
+            self.task_resource.set_task_model(tag, version=None)
+            self.cfg_path = os.path.join(
+                self.task_resource.res_dir,
+                self.task_resource.res_dict['cfg_path'])
+            self.ckpt_path = os.path.join(
+                self.task_resource.res_dir,
+                self.task_resource.res_dict['ckpt_path'])
+            self.vocab_file = os.path.join(
+                self.task_resource.res_dir,
+                self.task_resource.res_dict['vocab_file'])
+        else:
+            self.cfg_path = os.path.abspath(cfg_path)
+            self.ckpt_path = os.path.abspath(ckpt_path)
+            self.vocab_file = os.path.abspath(vocab_file)
+
+        model_name = model_type[:model_type.rindex('_')]
+
+        if self.task == 'punc':
+            # punc list
+            self._punc_list = []
+            with open(self.vocab_file, 'r') as f:
+                for line in f:
+                    self._punc_list.append(line.strip())
+
+            # model
+            with open(self.cfg_path) as f:
+                config = CfgNode(yaml.safe_load(f))
+            self.model = ErnieLinear(**config["model"])
+
+            _, tokenizer_class = self.task_resource.get_model_class(model_name)
+            state_dict = paddle.load(self.ckpt_path)
+            self.model.set_state_dict(state_dict["main_params"])
+            self.model.eval()
+
+            #tokenizer: fast version: ernie-3.0-mini-zh   slow version:ernie-1.0
+            if 'fast' not in model_type:
+                self.tokenizer = tokenizer_class.from_pretrained('ernie-1.0')
+            else:
+                self.tokenizer = tokenizer_class.from_pretrained(
+                    'ernie-3.0-mini-zh')
+
+        else:
+            raise NotImplementedError
+
     def _clean_text(self, text):
         text = text.lower()
         text = re.sub('[^A-Za-z0-9\u4e00-\u9fa5]', '', text)
@@ -179,7 +242,7 @@ class TextExecutor(BaseExecutor):
         else:
             raise NotImplementedError
 
-    def postprocess(self) -> Union[str, os.PathLike]:
+    def postprocess(self, isNewTrainer: bool=False) -> Union[str, os.PathLike]:
         """
             Output postprocess and return human-readable results such as texts and audio files.
         """
@@ -192,13 +255,13 @@ class TextExecutor(BaseExecutor):
                 input_ids[1:seq_len - 1])
             labels = preds[1:seq_len - 1].tolist()
             assert len(tokens) == len(labels)
-
+            if isNewTrainer:
+                self._punc_list = [0] + self._punc_list
             text = ''
             for t, l in zip(tokens, labels):
                 text += t
                 if l != 0:  # Non punc.
                     text += self._punc_list[l]
-
             return text
         else:
             raise NotImplementedError
@@ -255,10 +318,20 @@ class TextExecutor(BaseExecutor):
         """
             Python API to call an executor.
         """
-        paddle.set_device(device)
-        self._init_from_path(task, model, lang, config, ckpt_path, punc_vocab)
-        self.preprocess(text)
-        self.infer()
-        res = self.postprocess()  # Retrieve result of text task.
-
+        #Here is old version models 
+        if model in ['ernie_linear_p7_wudao', 'ernie_linear_p3_wudao']:
+            paddle.set_device(device)
+            self._init_from_path(task, model, lang, config, ckpt_path,
+                                 punc_vocab)
+            self.preprocess(text)
+            self.infer()
+            res = self.postprocess()  # Retrieve result of text task.
+        #Add new way to infer
+        else:
+            paddle.set_device(device)
+            self._init_from_path_new(task, model, lang, config, ckpt_path,
+                                     punc_vocab)
+            self.preprocess(text)
+            self.infer()
+            res = self.postprocess(isNewTrainer=True)
         return res

From 92d09d5cce640300ac182852600217ac8796c34f Mon Sep 17 00:00:00 2001
From: Zhao Yuting <91456992+THUzyt21@users.noreply.github.com>
Date: Tue, 20 Sep 2022 16:31:38 +0800
Subject: [PATCH 04/40] Update README_cn.md

---
 paddlespeech/cli/README_cn.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/paddlespeech/cli/README_cn.md b/paddlespeech/cli/README_cn.md
index 4b15d6c7b..6464c598c 100644
--- a/paddlespeech/cli/README_cn.md
+++ b/paddlespeech/cli/README_cn.md
@@ -43,3 +43,7 @@
   ```bash
   paddlespeech text --task punc --input 今天的天气真不错啊你下午有空吗我想约你一起去吃饭
   ```
+- 快速标点恢复
+  ```bash
+  paddlespeech text --task punc --input 今天的天气真不错啊你下午有空吗我想约你一起去吃饭 --model ernie_linear_p3_wudao_fast
+  ```

From fb7f04e021d495524878e79b9e12d675490e2e77 Mon Sep 17 00:00:00 2001
From: Zhao Yuting <91456992+THUzyt21@users.noreply.github.com>
Date: Tue, 20 Sep 2022 16:32:45 +0800
Subject: [PATCH 05/40] Update README.md

---
 paddlespeech/cli/README.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/paddlespeech/cli/README.md b/paddlespeech/cli/README.md
index 19c822040..53c1ca3b2 100644
--- a/paddlespeech/cli/README.md
+++ b/paddlespeech/cli/README.md
@@ -42,3 +42,7 @@
   ```bash
   paddlespeech text --task punc --input 今天的天气真不错啊你下午有空吗我想约你一起去吃饭
   ```
+- Faster Punctuation Restoration
+   ```bash
+  paddlespeech text --task punc --input 今天的天气真不错啊你下午有空吗我想约你一起去吃饭 --model ernie_linear_p3_wudao_fast
+  ```

From 12a11394bd3f33f81e6a7e834c34993a2e1336d0 Mon Sep 17 00:00:00 2001
From: Zhao Yuting <91456992+THUzyt21@users.noreply.github.com>
Date: Tue, 20 Sep 2022 16:53:44 +0800
Subject: [PATCH 06/40] Update infer.py

add a new faster model to infer in cli
---
 paddlespeech/cli/text/infer.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/paddlespeech/cli/text/infer.py b/paddlespeech/cli/text/infer.py
index ff822f674..8433e6545 100644
--- a/paddlespeech/cli/text/infer.py
+++ b/paddlespeech/cli/text/infer.py
@@ -335,3 +335,4 @@ class TextExecutor(BaseExecutor):
             self.infer()
             res = self.postprocess(isNewTrainer=True)
         return res
+

From a63a0b13503b3bf2d8b752973739a68d7e16780e Mon Sep 17 00:00:00 2001
From: Zhao Yuting <91456992+THUzyt21@users.noreply.github.com>
Date: Tue, 20 Sep 2022 16:58:16 +0800
Subject: [PATCH 07/40] Update pretrained_models.py

---
 paddlespeech/resource/pretrained_models.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/paddlespeech/resource/pretrained_models.py b/paddlespeech/resource/pretrained_models.py
index 0a1ed15e1..b6ab7f01c 100644
--- a/paddlespeech/resource/pretrained_models.py
+++ b/paddlespeech/resource/pretrained_models.py
@@ -546,7 +546,7 @@ text_dynamic_pretrained_models = {
         }
     },
     "ernie_linear_p3_wudao_fast-punc-zh": {
-         '1.0':{
+        '1.0': {
             'url':
             'https://paddlespeech.bj.bcebos.com/text/ernie_linear_p3_wudao_fast-punc-zh.tar.gz',
             'md5':
@@ -561,8 +561,6 @@ text_dynamic_pretrained_models = {
     }
 }
 
-
-
 # ---------------------------------
 # -------------- TTS --------------
 # ---------------------------------

From 18b71dc1361030c47031e472f05c1664c79c4849 Mon Sep 17 00:00:00 2001
From: Zhao Yuting <91456992+THUzyt21@users.noreply.github.com>
Date: Tue, 20 Sep 2022 18:16:09 +0800
Subject: [PATCH 10/40] Update README.md

---
 paddlespeech/cli/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paddlespeech/cli/README.md b/paddlespeech/cli/README.md
index 53c1ca3b2..1d10e0d79 100644
--- a/paddlespeech/cli/README.md
+++ b/paddlespeech/cli/README.md
@@ -45,4 +45,4 @@
 - Faster Punctuation Restoration
    ```bash
   paddlespeech text --task punc --input 今天的天气真不错啊你下午有空吗我想约你一起去吃饭 --model ernie_linear_p3_wudao_fast
-  ```
+   ```

From d5dec463365e6d000477b63a2d4d000d4d398b50 Mon Sep 17 00:00:00 2001
From: Zhao Yuting <91456992+THUzyt21@users.noreply.github.com>
Date: Tue, 20 Sep 2022 18:22:41 +0800
Subject: [PATCH 11/40] Update README.md

---
 paddlespeech/cli/README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/paddlespeech/cli/README.md b/paddlespeech/cli/README.md
index 1d10e0d79..e6e216c0b 100644
--- a/paddlespeech/cli/README.md
+++ b/paddlespeech/cli/README.md
@@ -43,6 +43,6 @@
   paddlespeech text --task punc --input 今天的天气真不错啊你下午有空吗我想约你一起去吃饭
   ```
 - Faster Punctuation Restoration
-   ```bash
+  ```bash
   paddlespeech text --task punc --input 今天的天气真不错啊你下午有空吗我想约你一起去吃饭 --model ernie_linear_p3_wudao_fast
-   ```
+  ```

From bdbacd42499b39aba3d013002989bbe44da3588f Mon Sep 17 00:00:00 2001
From: THUzyt21 <zhao-yt21@mails.tsinghua.edu.cn>
Date: Tue, 20 Sep 2022 10:48:34 +0000
Subject: [PATCH 12/40] precomited

---
 paddlespeech/cli/text/infer.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/paddlespeech/cli/text/infer.py b/paddlespeech/cli/text/infer.py
index 8433e6545..ff822f674 100644
--- a/paddlespeech/cli/text/infer.py
+++ b/paddlespeech/cli/text/infer.py
@@ -335,4 +335,3 @@ class TextExecutor(BaseExecutor):
             self.infer()
             res = self.postprocess(isNewTrainer=True)
         return res
-

From 0cd01241dbcb03a7407902ab7b9cba91858aff17 Mon Sep 17 00:00:00 2001
From: Zhao Yuting <91456992+THUzyt21@users.noreply.github.com>
Date: Tue, 27 Sep 2022 15:33:13 +0800
Subject: [PATCH 13/40] Update test_cli.sh

update about text cli
---
 tests/unit/cli/test_cli.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/unit/cli/test_cli.sh b/tests/unit/cli/test_cli.sh
index 15604961d..c6837c303 100755
--- a/tests/unit/cli/test_cli.sh
+++ b/tests/unit/cli/test_cli.sh
@@ -7,7 +7,7 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/cat.wav https://paddlespe
 paddlespeech cls --input ./cat.wav --topk 10
 
 # Punctuation_restoration
-paddlespeech text --input 今天的天气真不错啊你下午有空吗我想约你一起去吃饭
+paddlespeech text --input 今天的天气真不错啊你下午有空吗我想约你一起去吃饭 --model ernie_linear_p3_wudao_fast
 
 # Speech_recognition
 wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav

From 9f8fbdbc09807a6b80416e846c3f7e394180df33 Mon Sep 17 00:00:00 2001
From: Zhao Yuting <91456992+THUzyt21@users.noreply.github.com>
Date: Tue, 27 Sep 2022 16:09:36 +0800
Subject: [PATCH 14/40] Update punc_application.yaml

change model
---
 demos/streaming_asr_server/conf/punc_application.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/demos/streaming_asr_server/conf/punc_application.yaml b/demos/streaming_asr_server/conf/punc_application.yaml
index f947525e1..8456e2329 100644
--- a/demos/streaming_asr_server/conf/punc_application.yaml
+++ b/demos/streaming_asr_server/conf/punc_application.yaml
@@ -22,7 +22,7 @@ engine_list: ['text_python']
 ################### text task: punc; engine_type: python #######################
 text_python:
     task: punc
-    model_type: 'ernie_linear_p3_wudao'
+    model_type: 'ernie_linear_p3_wudao_fast'
     lang: 'zh'
     sample_rate: 16000
     cfg_path: # [optional]

From 82f731c1530c6e46470d4497073438ec6ab25d5b Mon Sep 17 00:00:00 2001
From: Zhao Yuting <91456992+THUzyt21@users.noreply.github.com>
Date: Tue, 27 Sep 2022 16:13:11 +0800
Subject: [PATCH 15/40] Update application.yaml

change model
---
 paddlespeech/server/conf/application.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paddlespeech/server/conf/application.yaml b/paddlespeech/server/conf/application.yaml
index 55f241ec7..47b8b178f 100644
--- a/paddlespeech/server/conf/application.yaml
+++ b/paddlespeech/server/conf/application.yaml
@@ -142,7 +142,7 @@ cls_inference:
 ################### text task: punc; engine_type: python #######################
 text_python:
     task: punc
-    model_type: 'ernie_linear_p3_wudao'
+    model_type: 'ernie_linear_p3_wudao_fast'
     lang: 'zh'
     sample_rate: 16000
     cfg_path: # [optional]

From d2da7f50d2982704dfb59184906cca96bff0c95b Mon Sep 17 00:00:00 2001
From: Zhao Yuting <91456992+THUzyt21@users.noreply.github.com>
Date: Tue, 27 Sep 2022 16:27:49 +0800
Subject: [PATCH 16/40] Update text_engine.py

precommihted already
---
 paddlespeech/server/engine/text/python/text_engine.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/paddlespeech/server/engine/text/python/text_engine.py b/paddlespeech/server/engine/text/python/text_engine.py
index 6167e7784..9f2a48d51 100644
--- a/paddlespeech/server/engine/text/python/text_engine.py
+++ b/paddlespeech/server/engine/text/python/text_engine.py
@@ -107,10 +107,11 @@ class PaddleTextConnectionHandler:
             assert len(tokens) == len(labels)
 
             text = ''
+            print(self._punc_list)
             for t, l in zip(tokens, labels):
                 text += t
                 if l != 0:  # Non punc.
-                    text += self._punc_list[l]
+                    text += self._punc_list[l - 1]
 
             return text
         else:
@@ -160,7 +161,7 @@ class TextEngine(BaseEngine):
             return False
 
         self.executor = TextServerExecutor()
-        self.executor._init_from_path(
+        self.executor._init_from_path_new(
             task=config.task,
             model_type=config.model_type,
             lang=config.lang,

From 83cd15be0c8077139baaaf82db63fb5ad9697c07 Mon Sep 17 00:00:00 2001
From: Zhao Yuting <91456992+THUzyt21@users.noreply.github.com>
Date: Tue, 27 Sep 2022 17:19:24 +0800
Subject: [PATCH 17/40] Create ernie-3.0.yaml

config file of ernie-3.0-base-zh
---
 examples/iwslt2012/punc0/conf/ernie-3.0.yaml | 44 ++++++++++++++++++++
 1 file changed, 44 insertions(+)
 create mode 100644 examples/iwslt2012/punc0/conf/ernie-3.0.yaml

diff --git a/examples/iwslt2012/punc0/conf/ernie-3.0.yaml b/examples/iwslt2012/punc0/conf/ernie-3.0.yaml
new file mode 100644
index 000000000..845b13fd8
--- /dev/null
+++ b/examples/iwslt2012/punc0/conf/ernie-3.0.yaml
@@ -0,0 +1,44 @@
+###########################################################
+#                       DATA SETTING                      #
+###########################################################
+dataset_type: Ernie
+train_path: data/iwslt2012_zh/train.txt
+dev_path: data/iwslt2012_zh/dev.txt
+test_path: data/iwslt2012_zh/test.txt
+batch_size: 64
+num_workers: 2
+data_params: 
+    pretrained_token: ernie-3.0-base-zh
+    punc_path: data/iwslt2012_zh/punc_vocab
+    seq_len: 100
+
+
+###########################################################
+#                       MODEL SETTING                     #
+###########################################################
+model_type: ErnieLinear
+model:
+    pretrained_token: ernie-3.0-base-zh
+    num_classes: 4
+
+###########################################################
+#                     OPTIMIZER SETTING                   #
+###########################################################
+optimizer_params:
+    weight_decay: 1.0e-6               # weight decay coefficient.
+
+scheduler_params:
+    learning_rate: 1.0e-5               # learning rate.
+    gamma: 0.9999                          # scheduler gamma must between(0.0, 1.0) and closer to 1.0 is better.
+
+###########################################################
+#                     TRAINING SETTING                    #
+###########################################################
+max_epoch: 20
+num_snapshots: 5
+
+###########################################################
+#                     OTHER SETTING                       #
+###########################################################
+num_snapshots: 10                 # max number of snapshots to keep while training
+seed: 42                          # random seed for paddle, random, and np.random

From ae8076c7462b277cdb252f44dcb6e9616348fac3 Mon Sep 17 00:00:00 2001
From: Zhao Yuting <91456992+THUzyt21@users.noreply.github.com>
Date: Tue, 27 Sep 2022 17:21:20 +0800
Subject: [PATCH 18/40] Rename ernie-3.0.yaml to ernie-3.0-base.yaml

---
 .../iwslt2012/punc0/conf/{ernie-3.0.yaml => ernie-3.0-base.yaml}  | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename examples/iwslt2012/punc0/conf/{ernie-3.0.yaml => ernie-3.0-base.yaml} (100%)

diff --git a/examples/iwslt2012/punc0/conf/ernie-3.0.yaml b/examples/iwslt2012/punc0/conf/ernie-3.0-base.yaml
similarity index 100%
rename from examples/iwslt2012/punc0/conf/ernie-3.0.yaml
rename to examples/iwslt2012/punc0/conf/ernie-3.0-base.yaml

From 7753a3bddc2f29ad5ac9cd14088c9db26aec0573 Mon Sep 17 00:00:00 2001
From: Zhao Yuting <91456992+THUzyt21@users.noreply.github.com>
Date: Tue, 27 Sep 2022 17:22:26 +0800
Subject: [PATCH 19/40] Create ernie-3.0-medium-zh

config file of ernie-3.0-medium-zh
---
 .../iwslt2012/punc0/conf/ernie-3.0-medium-zh  | 44 +++++++++++++++++++
 1 file changed, 44 insertions(+)
 create mode 100644 examples/iwslt2012/punc0/conf/ernie-3.0-medium-zh

diff --git a/examples/iwslt2012/punc0/conf/ernie-3.0-medium-zh b/examples/iwslt2012/punc0/conf/ernie-3.0-medium-zh
new file mode 100644
index 000000000..392ba011c
--- /dev/null
+++ b/examples/iwslt2012/punc0/conf/ernie-3.0-medium-zh
@@ -0,0 +1,44 @@
+###########################################################
+#                       DATA SETTING                      #
+###########################################################
+dataset_type: Ernie
+train_path: data/iwslt2012_zh/train.txt
+dev_path: data/iwslt2012_zh/dev.txt
+test_path: data/iwslt2012_zh/test.txt
+batch_size: 64
+num_workers: 2
+data_params: 
+    pretrained_token: ernie-3.0-medium-zh
+    punc_path: data/iwslt2012_zh/punc_vocab
+    seq_len: 100
+
+
+###########################################################
+#                       MODEL SETTING                     #
+###########################################################
+model_type: ErnieLinear
+model:
+    pretrained_token: ernie-3.0-medium-zh
+    num_classes: 4
+
+###########################################################
+#                     OPTIMIZER SETTING                   #
+###########################################################
+optimizer_params:
+    weight_decay: 1.0e-6               # weight decay coefficient.
+
+scheduler_params:
+    learning_rate: 1.0e-5               # learning rate.
+    gamma: 0.9999                          # scheduler gamma must between(0.0, 1.0) and closer to 1.0 is better.
+
+###########################################################
+#                     TRAINING SETTING                    #
+###########################################################
+max_epoch: 20
+num_snapshots: 5
+
+###########################################################
+#                     OTHER SETTING                       #
+###########################################################
+num_snapshots: 10                 # max number of snapshots to keep while training
+seed: 42                          # random seed for paddle, random, and np.random

From ca780d7edccea488ef50d2c1cb52e0c29c98a7e3 Mon Sep 17 00:00:00 2001
From: Zhao Yuting <91456992+THUzyt21@users.noreply.github.com>
Date: Tue, 27 Sep 2022 17:22:47 +0800
Subject: [PATCH 20/40] Rename ernie-3.0-medium-zh to ernie-3.0-medium.yaml

---
 .../punc0/conf/{ernie-3.0-medium-zh => ernie-3.0-medium.yaml}     | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename examples/iwslt2012/punc0/conf/{ernie-3.0-medium-zh => ernie-3.0-medium.yaml} (100%)

diff --git a/examples/iwslt2012/punc0/conf/ernie-3.0-medium-zh b/examples/iwslt2012/punc0/conf/ernie-3.0-medium.yaml
similarity index 100%
rename from examples/iwslt2012/punc0/conf/ernie-3.0-medium-zh
rename to examples/iwslt2012/punc0/conf/ernie-3.0-medium.yaml

From 83fd9589a1a047bbdaef12b94e40cdb9ece0b9b7 Mon Sep 17 00:00:00 2001
From: Zhao Yuting <91456992+THUzyt21@users.noreply.github.com>
Date: Tue, 27 Sep 2022 17:23:25 +0800
Subject: [PATCH 21/40] Create ernie-3.0-mini.yaml

---
 .../iwslt2012/punc0/conf/ernie-3.0-mini.yaml  | 44 +++++++++++++++++++
 1 file changed, 44 insertions(+)
 create mode 100644 examples/iwslt2012/punc0/conf/ernie-3.0-mini.yaml

diff --git a/examples/iwslt2012/punc0/conf/ernie-3.0-mini.yaml b/examples/iwslt2012/punc0/conf/ernie-3.0-mini.yaml
new file mode 100644
index 000000000..c57fd94a8
--- /dev/null
+++ b/examples/iwslt2012/punc0/conf/ernie-3.0-mini.yaml
@@ -0,0 +1,44 @@
+###########################################################
+#                       DATA SETTING                      #
+###########################################################
+dataset_type: Ernie
+train_path: data/iwslt2012_zh/train.txt
+dev_path: data/iwslt2012_zh/dev.txt
+test_path: data/iwslt2012_zh/test.txt
+batch_size: 64
+num_workers: 2
+data_params: 
+    pretrained_token: ernie-3.0-mini-zh
+    punc_path: data/iwslt2012_zh/punc_vocab
+    seq_len: 100
+
+
+###########################################################
+#                       MODEL SETTING                     #
+###########################################################
+model_type: ErnieLinear
+model:
+    pretrained_token: ernie-3.0-mini-zh
+    num_classes: 4
+
+###########################################################
+#                     OPTIMIZER SETTING                   #
+###########################################################
+optimizer_params:
+    weight_decay: 1.0e-6               # weight decay coefficient.
+
+scheduler_params:
+    learning_rate: 1.0e-5               # learning rate.
+    gamma: 0.9999                          # scheduler gamma must between(0.0, 1.0) and closer to 1.0 is better.
+
+###########################################################
+#                     TRAINING SETTING                    #
+###########################################################
+max_epoch: 20
+num_snapshots: 5
+
+###########################################################
+#                     OTHER SETTING                       #
+###########################################################
+num_snapshots: 10                 # max number of snapshots to keep while training
+seed: 42                          # random seed for paddle, random, and np.random

From 402770f1933e21df5be244e3d39e1c28aa7945e0 Mon Sep 17 00:00:00 2001
From: Zhao Yuting <91456992+THUzyt21@users.noreply.github.com>
Date: Tue, 27 Sep 2022 17:23:57 +0800
Subject: [PATCH 22/40] Create ernie-3.0-nano-zh

---
 .../iwslt2012/punc0/conf/ernie-3.0-nano-zh    | 44 +++++++++++++++++++
 1 file changed, 44 insertions(+)
 create mode 100644 examples/iwslt2012/punc0/conf/ernie-3.0-nano-zh

diff --git a/examples/iwslt2012/punc0/conf/ernie-3.0-nano-zh b/examples/iwslt2012/punc0/conf/ernie-3.0-nano-zh
new file mode 100644
index 000000000..a7a84c4c1
--- /dev/null
+++ b/examples/iwslt2012/punc0/conf/ernie-3.0-nano-zh
@@ -0,0 +1,44 @@
+###########################################################
+#                       DATA SETTING                      #
+###########################################################
+dataset_type: Ernie
+train_path: data/iwslt2012_zh/train.txt
+dev_path: data/iwslt2012_zh/dev.txt
+test_path: data/iwslt2012_zh/test.txt
+batch_size: 64
+num_workers: 2
+data_params: 
+    pretrained_token: ernie-3.0-nano-zh
+    punc_path: data/iwslt2012_zh/punc_vocab
+    seq_len: 100
+
+
+###########################################################
+#                       MODEL SETTING                     #
+###########################################################
+model_type: ErnieLinear
+model:
+    pretrained_token: ernie-3.0-nano-zh
+    num_classes: 4
+
+###########################################################
+#                     OPTIMIZER SETTING                   #
+###########################################################
+optimizer_params:
+    weight_decay: 1.0e-6               # weight decay coefficient.
+
+scheduler_params:
+    learning_rate: 1.0e-5               # learning rate.
+    gamma: 0.9999                          # scheduler gamma must between(0.0, 1.0) and closer to 1.0 is better.
+
+###########################################################
+#                     TRAINING SETTING                    #
+###########################################################
+max_epoch: 20
+num_snapshots: 5
+
+###########################################################
+#                     OTHER SETTING                       #
+###########################################################
+num_snapshots: 10                 # max number of snapshots to keep while training
+seed: 42                          # random seed for paddle, random, and np.random

From d016584a3cabd48de3a59d180e7544b0bfcb512a Mon Sep 17 00:00:00 2001
From: Zhao Yuting <91456992+THUzyt21@users.noreply.github.com>
Date: Tue, 27 Sep 2022 17:24:16 +0800
Subject: [PATCH 23/40] Rename ernie-3.0-nano-zh to ernie-3.0-nano-zh.yaml

---
 .../punc0/conf/{ernie-3.0-nano-zh => ernie-3.0-nano-zh.yaml}      | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename examples/iwslt2012/punc0/conf/{ernie-3.0-nano-zh => ernie-3.0-nano-zh.yaml} (100%)

diff --git a/examples/iwslt2012/punc0/conf/ernie-3.0-nano-zh b/examples/iwslt2012/punc0/conf/ernie-3.0-nano-zh.yaml
similarity index 100%
rename from examples/iwslt2012/punc0/conf/ernie-3.0-nano-zh
rename to examples/iwslt2012/punc0/conf/ernie-3.0-nano-zh.yaml

From bdf577b43af147c010218c9d6970a23e37835600 Mon Sep 17 00:00:00 2001
From: Zhao Yuting <91456992+THUzyt21@users.noreply.github.com>
Date: Tue, 27 Sep 2022 17:24:48 +0800
Subject: [PATCH 24/40] Create Ernie-tiny.yaml

---
 examples/iwslt2012/punc0/conf/Ernie-tiny.yaml | 44 +++++++++++++++++++
 1 file changed, 44 insertions(+)
 create mode 100644 examples/iwslt2012/punc0/conf/Ernie-tiny.yaml

diff --git a/examples/iwslt2012/punc0/conf/Ernie-tiny.yaml b/examples/iwslt2012/punc0/conf/Ernie-tiny.yaml
new file mode 100644
index 000000000..6a5b7fee2
--- /dev/null
+++ b/examples/iwslt2012/punc0/conf/Ernie-tiny.yaml
@@ -0,0 +1,44 @@
+###########################################################
+#                       DATA SETTING                      #
+###########################################################
+dataset_type: Ernie
+train_path: data/iwslt2012_zh/train.txt
+dev_path: data/iwslt2012_zh/dev.txt
+test_path: data/iwslt2012_zh/test.txt
+batch_size: 64
+num_workers: 2
+data_params: 
+    pretrained_token: ernie-tiny
+    punc_path: data/iwslt2012_zh/punc_vocab
+    seq_len: 100
+
+
+###########################################################
+#                       MODEL SETTING                     #
+###########################################################
+model_type: ErnieLinear
+model:
+    pretrained_token: ernie-tiny
+    num_classes: 4
+
+###########################################################
+#                     OPTIMIZER SETTING                   #
+###########################################################
+optimizer_params:
+    weight_decay: 1.0e-6               # weight decay coefficient.
+
+scheduler_params:
+    learning_rate: 1.0e-5               # learning rate.
+    gamma: 0.9999                          # scheduler gamma must between(0.0, 1.0) and closer to 1.0 is better.
+
+###########################################################
+#                     TRAINING SETTING                    #
+###########################################################
+max_epoch: 20
+num_snapshots: 5
+
+###########################################################
+#                     OTHER SETTING                       #
+###########################################################
+num_snapshots: 10                 # max number of snapshots to keep while training
+seed: 42                          # random seed for paddle, random, and np.random

From 4c70f71671ac75d57dd4eb499580c68e83a35360 Mon Sep 17 00:00:00 2001
From: Zhao Yuting <91456992+THUzyt21@users.noreply.github.com>
Date: Tue, 27 Sep 2022 17:25:06 +0800
Subject: [PATCH 25/40] Rename Ernie-tiny.yaml to ernie-tiny.yaml

---
 .../iwslt2012/punc0/conf/{Ernie-tiny.yaml => ernie-tiny.yaml}     | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename examples/iwslt2012/punc0/conf/{Ernie-tiny.yaml => ernie-tiny.yaml} (100%)

diff --git a/examples/iwslt2012/punc0/conf/Ernie-tiny.yaml b/examples/iwslt2012/punc0/conf/ernie-tiny.yaml
similarity index 100%
rename from examples/iwslt2012/punc0/conf/Ernie-tiny.yaml
rename to examples/iwslt2012/punc0/conf/ernie-tiny.yaml

From ae90c51bd6a8ecb4d0b25759b86179ebb88e6cc2 Mon Sep 17 00:00:00 2001
From: TianYuan <white-sky@qq.com>
Date: Tue, 27 Sep 2022 12:13:05 +0000
Subject: [PATCH 26/40] add Speaker Diarization in readme, test=doc

---
 README.md    | 28 ++++++++++++++++++++++++++--
 README_cn.md | 34 +++++++++++++++++++++++++++++-----
 2 files changed, 55 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index 59c61f776..63466da84 100644
--- a/README.md
+++ b/README.md
@@ -19,8 +19,6 @@
 <div align="center">  
 <h4>
     <a href="#quick-start"> Quick Start </a>
-  | <a href="#quick-start-server"> Quick Start Server </a>
-  | <a href="#quick-start-streaming-server"> Quick Start Streaming Server</a>
   | <a href="#documents"> Documents </a>
   | <a href="#model-list"> Models List </a>
   | <a href="https://aistudio.baidu.com/aistudio/education/group/info/25130"> AIStudio Courses </a>
@@ -714,6 +712,31 @@ PaddleSpeech supports a series of most popular models. They are summarized in [r
   </tbody>
 </table>
 
+<a name="SpeakerDiarization"></a>
+
+**Speaker Diarization**
+
+<table style="width:100%">
+  <thead>
+    <tr>
+      <th> Task </th>
+      <th> Dataset </th>
+      <th> Model Type </th>
+      <th> Example </th>
+    </tr>
+  </thead>
+  <tbody>
+  <tr>
+      <td>Speaker Diarization</td>
+     <td>AMI</td>
+      <td>ECAPA-TDNN + AHC / SC</td>
+      <td>
+      <a href = "./examples/ami/sd0">ecapa-tdnn-ami</a>
+      </td>
+    </tr>
+  </tbody>
+</table>
+
 <a name="PunctuationRestoration"></a>
 
 **Punctuation Restoration**
@@ -767,6 +790,7 @@ Normally, [Speech SoTA](https://paperswithcode.com/area/speech), [Audio SoTA](ht
   - [Text-to-Speech](#TextToSpeech)
   - [Audio Classification](#AudioClassification)
   - [Speaker Verification](#SpeakerVerification)
+  - [Speaker Diarization](#SpeakerDiarization)
   - [Punctuation Restoration](#PunctuationRestoration)
 - [Community](#Community)
 - [Welcome to contribute](#contribution)
diff --git a/README_cn.md b/README_cn.md
index 070a656a2..2b473091f 100644
--- a/README_cn.md
+++ b/README_cn.md
@@ -19,10 +19,8 @@
 </p>
 <div align="center">  
 <h4>
-  <a href="#安装"> 安装 </a>
+    <a href="#安装"> 安装 </a>
   | <a href="#快速开始"> 快速开始 </a>
-  | <a href="#快速使用服务"> 快速使用服务 </a>
-  | <a href="#快速使用流式服务"> 快速使用流式服务 </a>
   | <a href="#教程文档"> 教程文档 </a>
   | <a href="#模型列表"> 模型列表 </a>
   | <a href="https://aistudio.baidu.com/aistudio/education/group/info/25130"> AIStudio 课程 </a>
@@ -717,8 +715,8 @@ PaddleSpeech 的 **语音合成** 主要包含三个模块：文本前端、声
   </thead>
   <tbody>
   <tr>
-      <td>Speaker Verification</td>
-      <td>VoxCeleb12</td>
+      <td>声纹识别</td>
+      <td>VoxCeleb1/2</td>
       <td>ECAPA-TDNN</td>
       <td>
       <a href = "./examples/voxceleb/sv0">ecapa-tdnn-voxceleb12</a>
@@ -727,6 +725,31 @@ PaddleSpeech 的 **语音合成** 主要包含三个模块：文本前端、声
   </tbody>
 </table>
 
+<a name="说话人日志模型"></a>
+
+**说话人日志**
+
+<table style="width:100%">
+  <thead>
+    <tr>
+      <th> 任务 </th>
+      <th> 数据集 </th>
+      <th> 模型类型 </th>
+      <th> 脚本 </th>
+    </tr>
+  </thead>
+  <tbody>
+  <tr>
+      <td>说话人日志</td>
+      <td>AMI</td>
+      <td>ECAPA-TDNN + AHC / SC</td>
+      <td>
+      <a href = "./examples/ami/sd0">ecapa-tdnn-ami</a>
+      </td>
+    </tr>
+  </tbody>
+</table>
+
 <a name="标点恢复模型"></a>
 
 **标点恢复**
@@ -786,6 +809,7 @@ PaddleSpeech 的 **语音合成** 主要包含三个模块：文本前端、声
   - [语音合成](#语音合成模型)
   - [声音分类](#声音分类模型)
   - [声纹识别](#声纹识别模型)
+  - [说话人日志](#说话人日志模型)
   - [标点恢复](#标点恢复模型)
 - [技术交流群](#技术交流群)
 - [欢迎贡献](#欢迎贡献)

From 1e4f4dc5d35364ec4754c06c3cf58ddb7e25f042 Mon Sep 17 00:00:00 2001
From: TianYuan <white-sky@qq.com>
Date: Tue, 27 Sep 2022 21:25:37 +0800
Subject: [PATCH 27/40] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 63466da84..ca4071109 100644
--- a/README.md
+++ b/README.md
@@ -703,7 +703,7 @@ PaddleSpeech supports a series of most popular models. They are summarized in [r
   <tbody>
   <tr>
       <td>Speaker Verification</td>
-      <td>VoxCeleb12</td>
+      <td>VoxCeleb1/2</td>
       <td>ECAPA-TDNN</td>
       <td>
       <a href = "./examples/voxceleb/sv0">ecapa-tdnn-voxceleb12</a>

From 4e55c2067f54d2747a06712562fa2ca0eda48e07 Mon Sep 17 00:00:00 2001
From: TianYuan <white-sky@qq.com>
Date: Wed, 28 Sep 2022 11:39:07 +0800
Subject: [PATCH 28/40] Update README.md

---
 README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/README.md b/README.md
index ca4071109..d33287762 100644
--- a/README.md
+++ b/README.md
@@ -157,6 +157,8 @@ Via the easy-to-use, efficient, flexible and scalable implementation, our vision
   - 🧩  *Cascaded models application*: as an extension of the typical traditional audio tasks, we combine the workflows of the aforementioned tasks with other fields like Natural language processing (NLP) and Computer Vision (CV).
 
 ### Recent Update
+- 🔥 2022.09.26: Add Voice Cloning, TTS finetune, and ERNIE-SAT in [PaddleSpeech Web Demo](./demos/speech_web).
+- ⚡ 2022.09.09: Add AISHELL-3 Voice Cloning with ECAPA-TDNN.
 - ⚡ 2022.08.25: Release TTS [finetune](./examples/other/tts_finetune/tts3) example.
 - 🔥 2022.08.22: Add ERNIE-SAT models: [ERNIE-SAT-vctk](./examples/vctk/ernie_sat)、[ERNIE-SAT-aishell3](./examples/aishell3/ernie_sat)、[ERNIE-SAT-zh_en](./examples/aishell3_vctk/ernie_sat).
 - 🔥 2022.08.15: Add [g2pW](https://github.com/GitYCC/g2pW) into TTS Chinese Text Frontend.

From faa08085110bf4aacc8e4eab416635d7a69a2b05 Mon Sep 17 00:00:00 2001
From: TianYuan <white-sky@qq.com>
Date: Wed, 28 Sep 2022 11:43:40 +0800
Subject: [PATCH 29/40] Update README_cn.md

---
 README_cn.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/README_cn.md b/README_cn.md
index 2b473091f..f3e176e33 100644
--- a/README_cn.md
+++ b/README_cn.md
@@ -179,6 +179,8 @@
 </div>
 
 ### 近期更新
+- 🔥 2022.09.26: 新增 Voice Cloning, TTS finetune 和 ERNIE-SAT 到 [PaddleSpeech Web Demo](./demos/speech_web)。
+- ⚡ 2022.09.09: 新增基于 ECAPA-TDNN 声纹模型的 AISHELL-3 Voice Cloning [示例](./examples/aishell3/vc2)。
 - ⚡ 2022.08.25: 发布 TTS [finetune](./examples/other/tts_finetune/tts3) 示例。
 - 🔥 2022.08.22: 新增 ERNIE-SAT 模型: [ERNIE-SAT-vctk](./examples/vctk/ernie_sat)、[ERNIE-SAT-aishell3](./examples/aishell3/ernie_sat)、[ERNIE-SAT-zh_en](./examples/aishell3_vctk/ernie_sat)。
 - 🔥 2022.08.15: 将 [g2pW](https://github.com/GitYCC/g2pW) 引入 TTS 中文文本前端。

From 175f0e7ba71535cc5d59b42f8cfd5842b0f4eda9 Mon Sep 17 00:00:00 2001
From: TianYuan <white-sky@qq.com>
Date: Wed, 28 Sep 2022 11:46:14 +0800
Subject: [PATCH 30/40] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index d33287762..72db64b7d 100644
--- a/README.md
+++ b/README.md
@@ -158,7 +158,7 @@ Via the easy-to-use, efficient, flexible and scalable implementation, our vision
 
 ### Recent Update
 - 🔥 2022.09.26: Add Voice Cloning, TTS finetune, and ERNIE-SAT in [PaddleSpeech Web Demo](./demos/speech_web).
-- ⚡ 2022.09.09: Add AISHELL-3 Voice Cloning with ECAPA-TDNN.
+- ⚡ 2022.09.09: Add AISHELL-3 Voice Cloning [example](./examples/aishell3/vc2) with ECAPA-TDNN speaker encoder.
 - ⚡ 2022.08.25: Release TTS [finetune](./examples/other/tts_finetune/tts3) example.
 - 🔥 2022.08.22: Add ERNIE-SAT models: [ERNIE-SAT-vctk](./examples/vctk/ernie_sat)、[ERNIE-SAT-aishell3](./examples/aishell3/ernie_sat)、[ERNIE-SAT-zh_en](./examples/aishell3_vctk/ernie_sat).
 - 🔥 2022.08.15: Add [g2pW](https://github.com/GitYCC/g2pW) into TTS Chinese Text Frontend.

From 764fa0a8599a6b20c6f719b70bb45a3b4d52b245 Mon Sep 17 00:00:00 2001
From: TianYuan <white-sky@qq.com>
Date: Wed, 28 Sep 2022 11:47:27 +0800
Subject: [PATCH 31/40] Update README_cn.md

---
 README_cn.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README_cn.md b/README_cn.md
index f3e176e33..725f7eda1 100644
--- a/README_cn.md
+++ b/README_cn.md
@@ -179,7 +179,7 @@
 </div>
 
 ### 近期更新
-- 🔥 2022.09.26: 新增 Voice Cloning, TTS finetune 和 ERNIE-SAT 到 [PaddleSpeech Web Demo](./demos/speech_web)。
+- 🔥 2022.09.26: 新增 Voice Cloning, TTS finetune 和 ERNIE-SAT 到 [PaddleSpeech 网页应用](./demos/speech_web)。
 - ⚡ 2022.09.09: 新增基于 ECAPA-TDNN 声纹模型的 AISHELL-3 Voice Cloning [示例](./examples/aishell3/vc2)。
 - ⚡ 2022.08.25: 发布 TTS [finetune](./examples/other/tts_finetune/tts3) 示例。
 - 🔥 2022.08.22: 新增 ERNIE-SAT 模型: [ERNIE-SAT-vctk](./examples/vctk/ernie_sat)、[ERNIE-SAT-aishell3](./examples/aishell3/ernie_sat)、[ERNIE-SAT-zh_en](./examples/aishell3_vctk/ernie_sat)。

From 8ecf6796f3673d2565ab2949c2e4d4f303c7c9ab Mon Sep 17 00:00:00 2001
From: Zhao Yuting <91456992+THUzyt21@users.noreply.github.com>
Date: Wed, 28 Sep 2022 15:23:49 +0800
Subject: [PATCH 32/40] Update text_engine.py

---
 paddlespeech/server/engine/text/python/text_engine.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/paddlespeech/server/engine/text/python/text_engine.py b/paddlespeech/server/engine/text/python/text_engine.py
index 9f2a48d51..b4ad95c64 100644
--- a/paddlespeech/server/engine/text/python/text_engine.py
+++ b/paddlespeech/server/engine/text/python/text_engine.py
@@ -107,7 +107,6 @@ class PaddleTextConnectionHandler:
             assert len(tokens) == len(labels)
 
             text = ''
-            print(self._punc_list)
             for t, l in zip(tokens, labels):
                 text += t
                 if l != 0:  # Non punc.

From 404708c64006dcff731204f9d9cbf7e616cdd7dc Mon Sep 17 00:00:00 2001
From: tianhao zhang <15600919271@163.com>
Date: Wed, 28 Sep 2022 11:15:06 +0000
Subject: [PATCH 33/40] fix s2t gpu training hang

---
 examples/aishell/asr0/local/train.sh     | 4 ++++
 examples/aishell/asr1/local/train.sh     | 4 ++++
 examples/librispeech/asr0/local/train.sh | 4 ++++
 examples/librispeech/asr1/local/train.sh | 4 ++++
 examples/librispeech/asr2/local/train.sh | 4 ++++
 examples/timit/asr1/local/train.sh       | 4 ++++
 examples/tiny/asr0/local/train.sh        | 4 ++++
 examples/tiny/asr1/local/train.sh        | 4 ++++
 examples/wenetspeech/asr1/local/train.sh | 4 ++++
 9 files changed, 36 insertions(+)

diff --git a/examples/aishell/asr0/local/train.sh b/examples/aishell/asr0/local/train.sh
index 256b30d22..2b71b7f76 100755
--- a/examples/aishell/asr0/local/train.sh
+++ b/examples/aishell/asr0/local/train.sh
@@ -26,6 +26,10 @@ if [ ${seed} != 0 ]; then
     export FLAGS_cudnn_deterministic=True
 fi
 
+# default memeory allocator strategy may case gpu training hang
+# for no OOM raised when memory exhaused
+export FLAGS_allocator_strategy=naive_best_fit
+
 if [ ${ngpu} == 0 ]; then
 python3 -u ${BIN_DIR}/train.py \
 --ngpu ${ngpu} \
diff --git a/examples/aishell/asr1/local/train.sh b/examples/aishell/asr1/local/train.sh
index f514de303..bfa8dd97d 100755
--- a/examples/aishell/asr1/local/train.sh
+++ b/examples/aishell/asr1/local/train.sh
@@ -35,6 +35,10 @@ echo ${ips_config}
 
 mkdir -p exp
 
+# default memeory allocator strategy may case gpu training hang
+# for no OOM raised when memory exhaused
+export FLAGS_allocator_strategy=naive_best_fit
+
 if [ ${ngpu} == 0 ]; then
 python3 -u ${BIN_DIR}/train.py \
 --ngpu ${ngpu} \
diff --git a/examples/librispeech/asr0/local/train.sh b/examples/librispeech/asr0/local/train.sh
index 71659e28d..bb41fd554 100755
--- a/examples/librispeech/asr0/local/train.sh
+++ b/examples/librispeech/asr0/local/train.sh
@@ -26,6 +26,10 @@ if [ ${seed} != 0 ]; then
     export FLAGS_cudnn_deterministic=True
 fi
 
+# default memeory allocator strategy may case gpu training hang
+# for no OOM raised when memory exhaused
+export FLAGS_allocator_strategy=naive_best_fit
+
 if [ ${ngpu} == 0 ]; then
 python3 -u ${BIN_DIR}/train.py \
 --ngpu ${ngpu} \
diff --git a/examples/librispeech/asr1/local/train.sh b/examples/librispeech/asr1/local/train.sh
index f729ed22c..e274b9133 100755
--- a/examples/librispeech/asr1/local/train.sh
+++ b/examples/librispeech/asr1/local/train.sh
@@ -29,6 +29,10 @@ fi
 # export FLAGS_cudnn_exhaustive_search=true
 # export FLAGS_conv_workspace_size_limit=4000
 
+# default memeory allocator strategy may case gpu training hang
+# for no OOM raised when memory exhaused
+export FLAGS_allocator_strategy=naive_best_fit
+
 if [ ${ngpu} == 0 ]; then
 python3 -u ${BIN_DIR}/train.py \
 --ngpu ${ngpu} \
diff --git a/examples/librispeech/asr2/local/train.sh b/examples/librispeech/asr2/local/train.sh
index 1f414ad41..c2f2d4b65 100755
--- a/examples/librispeech/asr2/local/train.sh
+++ b/examples/librispeech/asr2/local/train.sh
@@ -26,6 +26,10 @@ if [ ${seed} != 0 ]; then
     export FLAGS_cudnn_deterministic=True
 fi
 
+# default memeory allocator strategy may case gpu training hang
+# for no OOM raised when memory exhaused
+export FLAGS_allocator_strategy=naive_best_fit
+
 if [ ${ngpu} == 0 ]; then
 python3 -u ${BIN_DIR}/train.py \
 --ngpu ${ngpu} \
diff --git a/examples/timit/asr1/local/train.sh b/examples/timit/asr1/local/train.sh
index 661407582..1088c7ffa 100755
--- a/examples/timit/asr1/local/train.sh
+++ b/examples/timit/asr1/local/train.sh
@@ -19,6 +19,10 @@ if [ ${seed} != 0  ]; then
     export FLAGS_cudnn_deterministic=True
 fi
 
+# default memeory allocator strategy may case gpu training hang
+# for no OOM raised when memory exhaused
+export FLAGS_allocator_strategy=naive_best_fit
+
 if [ ${ngpu} == 0 ]; then
 python3 -u ${BIN_DIR}/train.py \
 --ngpu ${ngpu} \
diff --git a/examples/tiny/asr0/local/train.sh b/examples/tiny/asr0/local/train.sh
index 8b67902fe..e233a0c0a 100755
--- a/examples/tiny/asr0/local/train.sh
+++ b/examples/tiny/asr0/local/train.sh
@@ -32,6 +32,10 @@ fi
 
 mkdir -p exp
 
+# default memeory allocator strategy may case gpu training hang
+# for no OOM raised when memory exhaused
+export FLAGS_allocator_strategy=naive_best_fit
+
 if [ ${ngpu} == 0 ]; then
 python3 -u ${BIN_DIR}/train.py \
 --ngpu ${ngpu} \
diff --git a/examples/tiny/asr1/local/train.sh b/examples/tiny/asr1/local/train.sh
index 459f2e218..fbfb41f6f 100755
--- a/examples/tiny/asr1/local/train.sh
+++ b/examples/tiny/asr1/local/train.sh
@@ -34,6 +34,10 @@ fi
 
 mkdir -p exp
 
+# default memeory allocator strategy may case gpu training hang
+# for no OOM raised when memory exhaused
+export FLAGS_allocator_strategy=naive_best_fit
+
 if [ ${ngpu} == 0 ]; then
 python3 -u ${BIN_DIR}/train.py \
 --ngpu ${ngpu} \
diff --git a/examples/wenetspeech/asr1/local/train.sh b/examples/wenetspeech/asr1/local/train.sh
index 01af00b61..6813d270c 100755
--- a/examples/wenetspeech/asr1/local/train.sh
+++ b/examples/wenetspeech/asr1/local/train.sh
@@ -35,6 +35,10 @@ echo ${ips_config}
 
 mkdir -p exp
 
+# default memeory allocator strategy may case gpu training hang
+# for no OOM raised when memory exhaused
+export FLAGS_allocator_strategy=naive_best_fit
+
 if [ ${ngpu} == 0 ]; then
 python3 -u ${BIN_DIR}/train.py \
 --ngpu ${ngpu} \

From b9693a0e8e41636cdc1c141467a4fbee621119b7 Mon Sep 17 00:00:00 2001
From: Zhao Yuting <91456992+THUzyt21@users.noreply.github.com>
Date: Thu, 29 Sep 2022 13:08:20 +0800
Subject: [PATCH 34/40] Update text_engine.py

---
 paddlespeech/server/engine/text/python/text_engine.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/paddlespeech/server/engine/text/python/text_engine.py b/paddlespeech/server/engine/text/python/text_engine.py
index b4ad95c64..a871de35c 100644
--- a/paddlespeech/server/engine/text/python/text_engine.py
+++ b/paddlespeech/server/engine/text/python/text_engine.py
@@ -131,7 +131,6 @@ class TextEngine(BaseEngine):
         """
         super(TextEngine, self).__init__()
         logger.debug("Create the TextEngine Instance")
-
     def init(self, config: dict):
         """Init the Text Engine
 

From 8c945c073d6764b20b0ccad7b4cf5f00c1180bd6 Mon Sep 17 00:00:00 2001
From: Zhao Yuting <91456992+THUzyt21@users.noreply.github.com>
Date: Thu, 29 Sep 2022 14:05:49 +0800
Subject: [PATCH 35/40] Update application.yaml

---
 paddlespeech/server/conf/application.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paddlespeech/server/conf/application.yaml b/paddlespeech/server/conf/application.yaml
index 47b8b178f..55f241ec7 100644
--- a/paddlespeech/server/conf/application.yaml
+++ b/paddlespeech/server/conf/application.yaml
@@ -142,7 +142,7 @@ cls_inference:
 ################### text task: punc; engine_type: python #######################
 text_python:
     task: punc
-    model_type: 'ernie_linear_p3_wudao_fast'
+    model_type: 'ernie_linear_p3_wudao'
     lang: 'zh'
     sample_rate: 16000
     cfg_path: # [optional]

From 80837fd65812ddb64ce17c813ac1b05f27571458 Mon Sep 17 00:00:00 2001
From: Zhao Yuting <91456992+THUzyt21@users.noreply.github.com>
Date: Thu, 29 Sep 2022 14:06:57 +0800
Subject: [PATCH 36/40] Update punc_application.yaml

---
 demos/streaming_asr_server/conf/punc_application.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/demos/streaming_asr_server/conf/punc_application.yaml b/demos/streaming_asr_server/conf/punc_application.yaml
index 8456e2329..f947525e1 100644
--- a/demos/streaming_asr_server/conf/punc_application.yaml
+++ b/demos/streaming_asr_server/conf/punc_application.yaml
@@ -22,7 +22,7 @@ engine_list: ['text_python']
 ################### text task: punc; engine_type: python #######################
 text_python:
     task: punc
-    model_type: 'ernie_linear_p3_wudao_fast'
+    model_type: 'ernie_linear_p3_wudao'
     lang: 'zh'
     sample_rate: 16000
     cfg_path: # [optional]

From 304dc2603c583cda7d1bffb6f7d14eb7c40f96d0 Mon Sep 17 00:00:00 2001
From: Zhao Yuting <91456992+THUzyt21@users.noreply.github.com>
Date: Thu, 29 Sep 2022 14:08:37 +0800
Subject: [PATCH 37/40] Update text_engine.py

---
 .../server/engine/text/python/text_engine.py  | 33 +++++++++++++------
 1 file changed, 23 insertions(+), 10 deletions(-)

diff --git a/paddlespeech/server/engine/text/python/text_engine.py b/paddlespeech/server/engine/text/python/text_engine.py
index a871de35c..cc72c0543 100644
--- a/paddlespeech/server/engine/text/python/text_engine.py
+++ b/paddlespeech/server/engine/text/python/text_engine.py
@@ -107,11 +107,14 @@ class PaddleTextConnectionHandler:
             assert len(tokens) == len(labels)
 
             text = ''
+            is_fast_model = 'fast' in self.text_engine.config.model_type
             for t, l in zip(tokens, labels):
                 text += t
                 if l != 0:  # Non punc.
-                    text += self._punc_list[l - 1]
-
+                    if is_fast_model:
+                        text += self._punc_list[l - 1]
+                    else:
+                        text += self._punc_list[l]
             return text
         else:
             raise NotImplementedError
@@ -131,6 +134,7 @@ class TextEngine(BaseEngine):
         """
         super(TextEngine, self).__init__()
         logger.debug("Create the TextEngine Instance")
+
     def init(self, config: dict):
         """Init the Text Engine
 
@@ -159,14 +163,23 @@ class TextEngine(BaseEngine):
             return False
 
         self.executor = TextServerExecutor()
-        self.executor._init_from_path_new(
-            task=config.task,
-            model_type=config.model_type,
-            lang=config.lang,
-            cfg_path=config.cfg_path,
-            ckpt_path=config.ckpt_path,
-            vocab_file=config.vocab_file)
-
+        if 'fast' in config.model_type:
+            self.executor._init_from_path_new(
+                task=config.task,
+                model_type=config.model_type,
+                lang=config.lang,
+                cfg_path=config.cfg_path,
+                ckpt_path=config.ckpt_path,
+                vocab_file=config.vocab_file)
+        else:
+            self.executor._init_from_path(
+                task=config.task,
+                model_type=config.model_type,
+                lang=config.lang,
+                cfg_path=config.cfg_path,
+                ckpt_path=config.ckpt_path,
+                vocab_file=config.vocab_file)
+        logger.info("Using model: %s." % (config.model_type))
         logger.info("Initialize Text server engine successfully on device: %s."
                     % (self.device))
         return True

From 5bbe6e9897f7112fec0d06b08714fc26bde20ec5 Mon Sep 17 00:00:00 2001
From: tianhao zhang <15600919271@163.com>
Date: Thu, 29 Sep 2022 13:41:16 +0000
Subject: [PATCH 38/40] support u2pp cli and server, optimiz code of u2pp
 decode, test=asr

---
 .../conf/application.yaml                     |  2 +-
 docs/source/released_model.md                 |  1 +
 paddlespeech/cli/asr/infer.py                 |  4 +-
 paddlespeech/resource/model_alias.py          |  2 +
 paddlespeech/resource/pretrained_models.py    | 40 +++++++++++++++++++
 paddlespeech/s2t/exps/u2/bin/test_wav.py      |  4 +-
 paddlespeech/s2t/exps/u2/model.py             |  4 +-
 paddlespeech/s2t/models/u2/u2.py              | 33 +++++++--------
 .../server/conf/ws_conformer_application.yaml |  2 +-
 .../engine/asr/online/python/asr_engine.py    | 23 +++++++++--
 10 files changed, 83 insertions(+), 32 deletions(-)

diff --git a/demos/streaming_asr_server/conf/application.yaml b/demos/streaming_asr_server/conf/application.yaml
index a89d312ab..d446e13b6 100644
--- a/demos/streaming_asr_server/conf/application.yaml
+++ b/demos/streaming_asr_server/conf/application.yaml
@@ -21,7 +21,7 @@ engine_list: ['asr_online']
 ################################### ASR #########################################
 ################### speech task: asr; engine_type: online #######################
 asr_online:
-    model_type: 'conformer_online_wenetspeech'
+    model_type: 'conformer_u2pp_online_wenetspeech'
     am_model: # the pdmodel file of am static model [optional]
     am_params:  # the pdiparams file of am static model [optional]
     lang: 'zh'
diff --git a/docs/source/released_model.md b/docs/source/released_model.md
index d6691812e..bdac2c5bb 100644
--- a/docs/source/released_model.md
+++ b/docs/source/released_model.md
@@ -9,6 +9,7 @@ Acoustic Model | Training Data | Token-based | Size | Descriptions | CER | WER |
 [Ds2 Online Aishell ASR0 Model](https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_fbank161_ckpt_0.2.1.model.tar.gz) | Aishell Dataset | Char-based | 491 MB  | 2 Conv + 5 LSTM layers | 0.0666 |-| 151 h | [D2 Online Aishell ASR0](../../examples/aishell/asr0) | onnx/inference/python |
 [Ds2 Offline Aishell ASR0 Model](https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_offline_aishell_ckpt_1.0.1.model.tar.gz)| Aishell Dataset | Char-based | 1.4 GB | 2 Conv + 5 bidirectional LSTM layers| 0.0554 |-| 151 h | [Ds2 Offline Aishell ASR0](../../examples/aishell/asr0) | inference/python |
 [Conformer Online Wenetspeech ASR1 Model](https://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr1/asr1_chunk_conformer_wenetspeech_ckpt_1.0.0a.model.tar.gz) | WenetSpeech Dataset | Char-based | 457 MB  | Encoder:Conformer, Decoder:Transformer, Decoding method: Attention rescoring| 0.11 (test\_net) 0.1879 (test\_meeting) |-| 10000 h |- | python |
+[Conformer U2PP Online Wenetspeech ASR1 Model](https://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr1/asr1_chunk_conformer_u2pp_wenetspeech_ckpt_1.1.1.model.tar.gz) | WenetSpeech Dataset | Char-based | 476 MB  | Encoder:Conformer, Decoder:BiTransformer, Decoding method: Attention rescoring| 0.047198 (aishell test\_-1) 0.059212 (aishell test\_16) |-| 10000 h |- | python |
 [Conformer Online Aishell ASR1 Model](https://paddlespeech.bj.bcebos.com/s2t/aishell/asr1/asr1_chunk_conformer_aishell_ckpt_0.2.0.model.tar.gz) | Aishell Dataset | Char-based | 189 MB  | Encoder:Conformer, Decoder:Transformer, Decoding method: Attention rescoring| 0.0544 |-| 151 h | [Conformer Online Aishell ASR1](../../examples/aishell/asr1) | python |
 [Conformer Offline Aishell ASR1 Model](https://paddlespeech.bj.bcebos.com/s2t/aishell/asr1/asr1_conformer_aishell_ckpt_1.0.1.model.tar.gz) | Aishell Dataset | Char-based | 189 MB  | Encoder:Conformer, Decoder:Transformer, Decoding method: Attention rescoring | 0.0460 |-| 151 h | [Conformer Offline Aishell ASR1](../../examples/aishell/asr1) | python |
 [Transformer Aishell ASR1 Model](https://paddlespeech.bj.bcebos.com/s2t/aishell/asr1/asr1_transformer_aishell_ckpt_0.1.1.model.tar.gz) | Aishell Dataset | Char-based | 128 MB | Encoder:Transformer, Decoder:Transformer, Decoding method: Attention rescoring | 0.0523 || 151 h | [Transformer  Aishell ASR1](../../examples/aishell/asr1) | python |
diff --git a/paddlespeech/cli/asr/infer.py b/paddlespeech/cli/asr/infer.py
index 7296776f9..4a7feaf0f 100644
--- a/paddlespeech/cli/asr/infer.py
+++ b/paddlespeech/cli/asr/infer.py
@@ -51,7 +51,7 @@ class ASRExecutor(BaseExecutor):
         self.parser.add_argument(
             '--model',
             type=str,
-            default='conformer_wenetspeech',
+            default='conformer_u2pp_wenetspeech',
             choices=[
                 tag[:tag.index('-')]
                 for tag in self.task_resource.pretrained_models.keys()
@@ -465,7 +465,7 @@ class ASRExecutor(BaseExecutor):
     @stats_wrapper
     def __call__(self,
                  audio_file: os.PathLike,
-                 model: str='conformer_wenetspeech',
+                 model: str='conformer_u2pp_wenetspeech',
                  lang: str='zh',
                  sample_rate: int=16000,
                  config: os.PathLike=None,
diff --git a/paddlespeech/resource/model_alias.py b/paddlespeech/resource/model_alias.py
index 9c76dd4b3..3f36f11f2 100644
--- a/paddlespeech/resource/model_alias.py
+++ b/paddlespeech/resource/model_alias.py
@@ -25,6 +25,8 @@ model_alias = {
     "deepspeech2online": ["paddlespeech.s2t.models.ds2:DeepSpeech2Model"],
     "conformer": ["paddlespeech.s2t.models.u2:U2Model"],
     "conformer_online": ["paddlespeech.s2t.models.u2:U2Model"],
+    "conformer_u2pp": ["paddlespeech.s2t.models.u2:U2Model"],
+    "conformer_u2pp_online": ["paddlespeech.s2t.models.u2:U2Model"],
     "transformer": ["paddlespeech.s2t.models.u2:U2Model"],
     "wenetspeech": ["paddlespeech.s2t.models.u2:U2Model"],
 
diff --git a/paddlespeech/resource/pretrained_models.py b/paddlespeech/resource/pretrained_models.py
index f049879a3..eecf21768 100644
--- a/paddlespeech/resource/pretrained_models.py
+++ b/paddlespeech/resource/pretrained_models.py
@@ -68,6 +68,46 @@ asr_dynamic_pretrained_models = {
             '',
         },
     },
+    "conformer_u2pp_wenetspeech-zh-16k": {
+        '1.0': {
+            'url':
+            'https://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr1/asr1_chunk_conformer_u2pp_wenetspeech_ckpt_1.1.1.model.tar.gz',
+            'md5':
+            'eae678c04ed3b3f89672052fdc0c5e10',
+            'cfg_path':
+            'model.yaml',
+            'ckpt_path':
+            'exp/chunk_conformer_u2pp/checkpoints/avg_10',
+            'model':
+            'exp/chunk_conformer_u2pp/checkpoints/avg_10.pdparams',
+            'params':
+            'exp/chunk_conformer_u2pp/checkpoints/avg_10.pdparams',
+            'lm_url':
+            '',
+            'lm_md5':
+            '',
+        },
+    },
+    "conformer_u2pp_online_wenetspeech-zh-16k": {
+        '1.0': {
+            'url':
+            'https://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr1/asr1_chunk_conformer_u2pp_wenetspeech_ckpt_1.1.2.model.tar.gz',
+            'md5':
+            '925d047e9188dea7f421a718230c9ae3',
+            'cfg_path':
+            'model.yaml',
+            'ckpt_path':
+            'exp/chunk_conformer_u2pp/checkpoints/avg_10',
+            'model':
+            'exp/chunk_conformer_u2pp/checkpoints/avg_10.pdparams',
+            'params':
+            'exp/chunk_conformer_u2pp/checkpoints/avg_10.pdparams',
+            'lm_url':
+            '',
+            'lm_md5':
+            '',
+        },
+    },
     "conformer_online_multicn-zh-16k": {
         '1.0': {
             'url':
diff --git a/paddlespeech/s2t/exps/u2/bin/test_wav.py b/paddlespeech/s2t/exps/u2/bin/test_wav.py
index 4588def0b..46925faed 100644
--- a/paddlespeech/s2t/exps/u2/bin/test_wav.py
+++ b/paddlespeech/s2t/exps/u2/bin/test_wav.py
@@ -40,7 +40,6 @@ class U2Infer():
         self.preprocess_conf = config.preprocess_config
         self.preprocess_args = {"train": False}
         self.preprocessing = Transformation(self.preprocess_conf)
-        self.reverse_weight = getattr(config.model_conf, 'reverse_weight', 0.0)
         self.text_feature = TextFeaturizer(
             unit_type=config.unit_type,
             vocab=config.vocab_filepath,
@@ -89,8 +88,7 @@ class U2Infer():
                 ctc_weight=decode_config.ctc_weight,
                 decoding_chunk_size=decode_config.decoding_chunk_size,
                 num_decoding_left_chunks=decode_config.num_decoding_left_chunks,
-                simulate_streaming=decode_config.simulate_streaming,
-                reverse_weight=self.reverse_weight)
+                simulate_streaming=decode_config.simulate_streaming)
             rsl = result_transcripts[0][0]
             utt = Path(self.audio_file).name
             logger.info(f"hyp: {utt} {result_transcripts[0][0]}")
diff --git a/paddlespeech/s2t/exps/u2/model.py b/paddlespeech/s2t/exps/u2/model.py
index a13a6385e..a6197d073 100644
--- a/paddlespeech/s2t/exps/u2/model.py
+++ b/paddlespeech/s2t/exps/u2/model.py
@@ -316,7 +316,6 @@ class U2Tester(U2Trainer):
             vocab=self.config.vocab_filepath,
             spm_model_prefix=self.config.spm_model_prefix)
         self.vocab_list = self.text_feature.vocab_list
-        self.reverse_weight = getattr(config.model_conf, 'reverse_weight', 0.0)
 
     def id2token(self, texts, texts_len, text_feature):
         """ ord() id to chr() chr """
@@ -351,8 +350,7 @@ class U2Tester(U2Trainer):
             ctc_weight=decode_config.ctc_weight,
             decoding_chunk_size=decode_config.decoding_chunk_size,
             num_decoding_left_chunks=decode_config.num_decoding_left_chunks,
-            simulate_streaming=decode_config.simulate_streaming,
-            reverse_weight=self.reverse_weight)
+            simulate_streaming=decode_config.simulate_streaming)
         decode_time = time.time() - start_time
 
         for utt, target, result, rec_tids in zip(
diff --git a/paddlespeech/s2t/models/u2/u2.py b/paddlespeech/s2t/models/u2/u2.py
index 0a3e03b79..53c3bf555 100644
--- a/paddlespeech/s2t/models/u2/u2.py
+++ b/paddlespeech/s2t/models/u2/u2.py
@@ -507,16 +507,14 @@ class U2BaseModel(ASRInterface, nn.Layer):
             num_decoding_left_chunks, simulate_streaming)
         return hyps[0][0]
 
-    def attention_rescoring(
-            self,
-            speech: paddle.Tensor,
-            speech_lengths: paddle.Tensor,
-            beam_size: int,
-            decoding_chunk_size: int=-1,
-            num_decoding_left_chunks: int=-1,
-            ctc_weight: float=0.0,
-            simulate_streaming: bool=False,
-            reverse_weight: float=0.0, ) -> List[int]:
+    def attention_rescoring(self,
+                            speech: paddle.Tensor,
+                            speech_lengths: paddle.Tensor,
+                            beam_size: int,
+                            decoding_chunk_size: int=-1,
+                            num_decoding_left_chunks: int=-1,
+                            ctc_weight: float=0.0,
+                            simulate_streaming: bool=False) -> List[int]:
         """ Apply attention rescoring decoding, CTC prefix beam search
             is applied first to get nbest, then we resoring the nbest on
             attention decoder with corresponding encoder out
@@ -536,7 +534,7 @@ class U2BaseModel(ASRInterface, nn.Layer):
         """
         assert speech.shape[0] == speech_lengths.shape[0]
         assert decoding_chunk_size != 0
-        if reverse_weight > 0.0:
+        if self.reverse_weight > 0.0:
             # decoder should be a bitransformer decoder if reverse_weight > 0.0
             assert hasattr(self.decoder, 'right_decoder')
         device = speech.place
@@ -574,7 +572,7 @@ class U2BaseModel(ASRInterface, nn.Layer):
                                          self.eos)
         decoder_out, r_decoder_out, _ = self.decoder(
             encoder_out, encoder_mask, hyps_pad, hyps_lens, r_hyps_pad,
-            reverse_weight)  # (beam_size, max_hyps_len, vocab_size)
+            self.reverse_weight)  # (beam_size, max_hyps_len, vocab_size)
         # ctc score in ln domain
         decoder_out = paddle.nn.functional.log_softmax(decoder_out, axis=-1)
         decoder_out = decoder_out.numpy()
@@ -594,12 +592,13 @@ class U2BaseModel(ASRInterface, nn.Layer):
                 score += decoder_out[i][j][w]
             # last decoder output token is `eos`, for laste decoder input token.
             score += decoder_out[i][len(hyp[0])][self.eos]
-            if reverse_weight > 0:
+            if self.reverse_weight > 0:
                 r_score = 0.0
                 for j, w in enumerate(hyp[0]):
                     r_score += r_decoder_out[i][len(hyp[0]) - j - 1][w]
                 r_score += r_decoder_out[i][len(hyp[0])][self.eos]
-                score = score * (1 - reverse_weight) + r_score * reverse_weight
+                score = score * (1 - self.reverse_weight
+                                 ) + r_score * self.reverse_weight
             # add ctc score (which in ln domain)
             score += hyp[1] * ctc_weight
             if score > best_score:
@@ -748,8 +747,7 @@ class U2BaseModel(ASRInterface, nn.Layer):
                ctc_weight: float=0.0,
                decoding_chunk_size: int=-1,
                num_decoding_left_chunks: int=-1,
-               simulate_streaming: bool=False,
-               reverse_weight: float=0.0):
+               simulate_streaming: bool=False):
         """u2 decoding.
 
         Args:
@@ -821,8 +819,7 @@ class U2BaseModel(ASRInterface, nn.Layer):
                 decoding_chunk_size=decoding_chunk_size,
                 num_decoding_left_chunks=num_decoding_left_chunks,
                 ctc_weight=ctc_weight,
-                simulate_streaming=simulate_streaming,
-                reverse_weight=reverse_weight)
+                simulate_streaming=simulate_streaming)
             hyps = [hyp]
         else:
             raise ValueError(f"Not support decoding method: {decoding_method}")
diff --git a/paddlespeech/server/conf/ws_conformer_application.yaml b/paddlespeech/server/conf/ws_conformer_application.yaml
index d72eb2379..b6128118f 100644
--- a/paddlespeech/server/conf/ws_conformer_application.yaml
+++ b/paddlespeech/server/conf/ws_conformer_application.yaml
@@ -30,7 +30,7 @@ asr_online:
     decode_method: 
     num_decoding_left_chunks: -1
     force_yes: True
-    device:  # cpu or gpu:id
+    device: gpu # cpu or gpu:id
     continuous_decoding: True # enable continue decoding when endpoint detected
 
     am_predictor_conf:
diff --git a/paddlespeech/server/engine/asr/online/python/asr_engine.py b/paddlespeech/server/engine/asr/online/python/asr_engine.py
index 4c7c4b37a..740f5270d 100644
--- a/paddlespeech/server/engine/asr/online/python/asr_engine.py
+++ b/paddlespeech/server/engine/asr/online/python/asr_engine.py
@@ -22,6 +22,7 @@ from numpy import float32
 from yacs.config import CfgNode
 
 from paddlespeech.audio.transform.transformation import Transformation
+from paddlespeech.audio.utils.tensor_utils import st_reverse_pad_list
 from paddlespeech.cli.asr.infer import ASRExecutor
 from paddlespeech.cli.log import logger
 from paddlespeech.resource import CommonTaskResource
@@ -603,24 +604,31 @@ class PaddleASRConnectionHanddler:
 
         hyps_pad = pad_sequence(
             hyp_list, batch_first=True, padding_value=self.model.ignore_id)
+        ori_hyps_pad = hyps_pad
         hyps_lens = paddle.to_tensor(
             [len(hyp[0]) for hyp in hyps], place=self.device,
             dtype=paddle.long)  # (beam_size,)
         hyps_pad, _ = add_sos_eos(hyps_pad, self.model.sos, self.model.eos,
                                   self.model.ignore_id)
         hyps_lens = hyps_lens + 1  # Add <sos> at begining
-
         encoder_out = self.encoder_out.repeat(beam_size, 1, 1)
         encoder_mask = paddle.ones(
             (beam_size, 1, encoder_out.shape[1]), dtype=paddle.bool)
 
-        decoder_out, _, _ = self.model.decoder(
-            encoder_out, encoder_mask, hyps_pad,
-            hyps_lens)  # (beam_size, max_hyps_len, vocab_size)
+        r_hyps_pad = st_reverse_pad_list(ori_hyps_pad, hyps_lens - 1,
+                                         self.model.sos, self.model.eos)
+        decoder_out, r_decoder_out, _ = self.model.decoder(
+            encoder_out, encoder_mask, hyps_pad, hyps_lens, r_hyps_pad,
+            self.model.reverse_weight)  # (beam_size, max_hyps_len, vocab_size)
         # ctc score in ln domain
         decoder_out = paddle.nn.functional.log_softmax(decoder_out, axis=-1)
         decoder_out = decoder_out.numpy()
 
+        # r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
+        # conventional transformer decoder.
+        r_decoder_out = paddle.nn.functional.log_softmax(r_decoder_out, axis=-1)
+        r_decoder_out = r_decoder_out.numpy()
+
         # Only use decoder score for rescoring
         best_score = -float('inf')
         best_index = 0
@@ -632,6 +640,13 @@ class PaddleASRConnectionHanddler:
 
             # last decoder output token is `eos`, for laste decoder input token.
             score += decoder_out[i][len(hyp[0])][self.model.eos]
+            if self.model.reverse_weight > 0:
+                r_score = 0.0
+                for j, w in enumerate(hyp[0]):
+                    r_score += r_decoder_out[i][len(hyp[0]) - j - 1][w]
+                r_score += r_decoder_out[i][len(hyp[0])][self.model.eos]
+                score = score * (1 - self.model.reverse_weight
+                                 ) + r_score * self.model.reverse_weight
             # add ctc score (which in ln domain)
             score += hyp[1] * self.ctc_decode_config.ctc_weight
 

From 7a13b35fe6cec02b27ab9eb05e0ed47ef767a17b Mon Sep 17 00:00:00 2001
From: ZapBird <105480550+ZapBird@users.noreply.github.com>
Date: Fri, 30 Sep 2022 10:45:43 +0800
Subject: [PATCH 39/40] =?UTF-8?q?BytesIO=E7=B1=BB=E5=9E=8B=E6=97=B6?=
 =?UTF-8?q?=EF=BC=8C=E8=A6=81=E4=BF=9D=E8=AF=81=E5=88=87=E5=88=B0=E5=88=9D?=
 =?UTF-8?q?=E5=A7=8B=E4=BD=8D=E7=BD=AE=EF=BC=8C=E8=BF=99=E6=A0=B7=E5=A4=9A?=
 =?UTF-8?q?=E6=AC=A1=E8=AF=BB=E5=8F=96=E6=89=8D=E8=83=BD=E5=A4=9F=E6=AD=A3?=
 =?UTF-8?q?=E5=B8=B8=E3=80=82=E6=AF=94=E5=A6=82=5F=5Fcall=5F=5F=E5=87=BD?=
 =?UTF-8?q?=E6=95=B0=E3=80=82=20(#2484)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* BytesIO类型时，要保证切到初始位置，这样多次读取才能够正常。比如__call__函数。
__call__函数的参数audio_file为BytesIO类型时执行到self.preprocess(model, audio_file)会报错，需要判断audio_file为BytesIO类型时执行audio_file.seek(0)。
---
 paddlespeech/cli/asr/infer.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/paddlespeech/cli/asr/infer.py b/paddlespeech/cli/asr/infer.py
index 7296776f9..0c794a001 100644
--- a/paddlespeech/cli/asr/infer.py
+++ b/paddlespeech/cli/asr/infer.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import argparse
+import io
 import os
 import sys
 import time
@@ -229,6 +230,8 @@ class ASRExecutor(BaseExecutor):
         audio_file = input
         if isinstance(audio_file, (str, os.PathLike)):
             logger.debug("Preprocess audio_file:" + audio_file)
+        elif isinstance(audio_file, io.BytesIO):
+            audio_file.seek(0)
 
         # Get the object for feature extraction
         if "deepspeech2" in model_type or "conformer" in model_type or "transformer" in model_type:
@@ -352,6 +355,8 @@ class ASRExecutor(BaseExecutor):
             if not os.path.isfile(audio_file):
                 logger.error("Please input the right audio file path")
                 return False
+        elif isinstance(audio_file, io.BytesIO):
+            audio_file.seek(0)
 
         logger.debug("checking the audio file format......")
         try:

From 5b5167b58635c879da2ef36fa4283d99c321d6ce Mon Sep 17 00:00:00 2001
From: tianhao zhang <15600919271@163.com>
Date: Fri, 30 Sep 2022 04:14:22 +0000
Subject: [PATCH 40/40] support u2pp cli and server, optimiz code of u2pp
 decode, test=asr

---
 paddlespeech/resource/pretrained_models.py             | 4 ++--
 paddlespeech/server/conf/ws_conformer_application.yaml | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/paddlespeech/resource/pretrained_models.py b/paddlespeech/resource/pretrained_models.py
index eecf21768..d012a7d2d 100644
--- a/paddlespeech/resource/pretrained_models.py
+++ b/paddlespeech/resource/pretrained_models.py
@@ -69,7 +69,7 @@ asr_dynamic_pretrained_models = {
         },
     },
     "conformer_u2pp_wenetspeech-zh-16k": {
-        '1.0': {
+        '1.1': {
             'url':
             'https://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr1/asr1_chunk_conformer_u2pp_wenetspeech_ckpt_1.1.1.model.tar.gz',
             'md5':
@@ -89,7 +89,7 @@ asr_dynamic_pretrained_models = {
         },
     },
     "conformer_u2pp_online_wenetspeech-zh-16k": {
-        '1.0': {
+        '1.1': {
             'url':
             'https://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr1/asr1_chunk_conformer_u2pp_wenetspeech_ckpt_1.1.2.model.tar.gz',
             'md5':
diff --git a/paddlespeech/server/conf/ws_conformer_application.yaml b/paddlespeech/server/conf/ws_conformer_application.yaml
index b6128118f..d5357c853 100644
--- a/paddlespeech/server/conf/ws_conformer_application.yaml
+++ b/paddlespeech/server/conf/ws_conformer_application.yaml
@@ -30,7 +30,7 @@ asr_online:
     decode_method: 
     num_decoding_left_chunks: -1
     force_yes: True
-    device: gpu # cpu or gpu:id
+    device: cpu # cpu or gpu:id
     continuous_decoding: True # enable continue decoding when endpoint detected
 
     am_predictor_conf: