diff --git a/README.md b/README.md
index 9d7ed4258..8e338fdee 100644
--- a/README.md
+++ b/README.md
@@ -157,13 +157,16 @@ Via the easy-to-use, efficient, flexible and scalable implementation, our vision
   - 🧩  *Cascaded models application*: as an extension of the typical traditional audio tasks, we combine the workflows of the aforementioned tasks with other fields like Natural language processing (NLP) and Computer Vision (CV).
 
 ### Recent Update
+- 🎉 2022.11.17: Add [male voice for TTS](https://github.com/PaddlePaddle/PaddleSpeech/pull/2660).
+- 🔥 2022.11.07: Add [U2/U2++ C++ High Performance Streaming ASR Deployment](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/speechx/examples/u2pp_ol/wenetspeech).
+- 👑 2022.11.01: Add [Adversarial Loss](https://arxiv.org/pdf/1907.04448.pdf) for [Chinese English mixed TTS](./examples/zh_en_tts/tts3).
 - 🔥 2022.10.26: Add [Prosody Prediction](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/rhy) for TTS.
 - 🎉 2022.10.21: Add [SSML](https://github.com/PaddlePaddle/PaddleSpeech/discussions/2538) for TTS Chinese Text Frontend.
 - 👑 2022.10.11: Add [Wav2vec2ASR](./examples/librispeech/asr3), wav2vec2.0 fine-tuning for ASR on LibriSpeech.
-- 🔥 2022.09.26: Add Voice Cloning, TTS finetune, and ERNIE-SAT in [PaddleSpeech Web Demo](./demos/speech_web).
+- 🔥 2022.09.26: Add Voice Cloning, TTS finetune, and [ERNIE-SAT](https://arxiv.org/abs/2211.03545) in [PaddleSpeech Web Demo](./demos/speech_web).
 - ⚡ 2022.09.09: Add AISHELL-3 Voice Cloning [example](./examples/aishell3/vc2) with ECAPA-TDNN speaker encoder.
 - ⚡ 2022.08.25: Release TTS [finetune](./examples/other/tts_finetune/tts3) example.
-- 🔥 2022.08.22: Add ERNIE-SAT models: [ERNIE-SAT-vctk](./examples/vctk/ernie_sat)、[ERNIE-SAT-aishell3](./examples/aishell3/ernie_sat)、[ERNIE-SAT-zh_en](./examples/aishell3_vctk/ernie_sat).
+- 🔥 2022.08.22: Add [ERNIE-SAT](https://arxiv.org/abs/2211.03545) models: [ERNIE-SAT-vctk](./examples/vctk/ernie_sat)、[ERNIE-SAT-aishell3](./examples/aishell3/ernie_sat)、[ERNIE-SAT-zh_en](./examples/aishell3_vctk/ernie_sat).
 - 🔥 2022.08.15: Add [g2pW](https://github.com/GitYCC/g2pW) into TTS Chinese Text Frontend.
 - 🔥 2022.08.09: Release [Chinese English mixed TTS](./examples/zh_en_tts/tts3).
 - ⚡ 2022.08.03: Add ONNXRuntime infer for  TTS CLI.
@@ -578,7 +581,7 @@ PaddleSpeech supports a series of most popular models. They are summarized in [r
       </td>
     </tr>
     <tr>
-      <td>ERNIE-SAT</td>
+      <td><a href = "https://arxiv.org/abs/2211.03545">ERNIE-SAT</a></td>
       <td>VCTK / AISHELL-3 / ZH_EN</td>
       <td>
       <a href = "./examples/vctk/ernie_sat">ERNIE-SAT-vctk</a> / <a href = "./examples/aishell3/ernie_sat">ERNIE-SAT-aishell3</a> / <a href = "./examples/aishell3_vctk/ernie_sat">ERNIE-SAT-zh_en</a>
@@ -716,9 +719,9 @@ PaddleSpeech supports a series of most popular models. They are summarized in [r
   <tr>
       <td>Keyword Spotting</td>
       <td>hey-snips</td>
-      <td>PANN</td>
+      <td>MDTC</td>
       <td>
-      <a href = "./examples/hey_snips/kws0">pann-hey-snips</a>
+      <a href = "./examples/hey_snips/kws0">mdtc-hey-snips</a>
       </td>
     </tr>
   </tbody>
diff --git a/README_cn.md b/README_cn.md
index 2db883b5a..27b239123 100644
--- a/README_cn.md
+++ b/README_cn.md
@@ -164,13 +164,16 @@
 
   
 ### 近期更新
+- 🎉 2022.11.17: TTS 新增[高质量男性音色](https://github.com/PaddlePaddle/PaddleSpeech/pull/2660)。
+- 🔥 2022.11.07: 新增 [U2/U2++ 高性能流式 ASR C++ 部署](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/speechx/examples/u2pp_ol/wenetspeech)。
+- 👑 2022.11.01: [中英文混合 TTS](./examples/zh_en_tts/tts3) 新增 [Adversarial Loss](https://arxiv.org/pdf/1907.04448.pdf) 模块。
 - 🔥 2022.10.26: TTS 新增[韵律预测](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/rhy)功能。
 - 🎉 2022.10.21: TTS 中文文本前端新增 [SSML](https://github.com/PaddlePaddle/PaddleSpeech/discussions/2538) 功能。
 - 👑 2022.10.11: 新增 [Wav2vec2ASR](./examples/librispeech/asr3), 在 LibriSpeech 上针对 ASR 任务对 wav2vec2.0 的 finetuning。
-- 🔥 2022.09.26: 新增 Voice Cloning, TTS finetune 和 ERNIE-SAT 到 [PaddleSpeech 网页应用](./demos/speech_web)。
+- 🔥 2022.09.26: 新增 Voice Cloning, TTS finetune 和 [ERNIE-SAT](https://arxiv.org/abs/2211.03545) 到 [PaddleSpeech 网页应用](./demos/speech_web)。
 - ⚡ 2022.09.09: 新增基于 ECAPA-TDNN 声纹模型的 AISHELL-3 Voice Cloning [示例](./examples/aishell3/vc2)。
 - ⚡ 2022.08.25: 发布 TTS [finetune](./examples/other/tts_finetune/tts3) 示例。
-- 🔥 2022.08.22: 新增 ERNIE-SAT 模型: [ERNIE-SAT-vctk](./examples/vctk/ernie_sat)、[ERNIE-SAT-aishell3](./examples/aishell3/ernie_sat)、[ERNIE-SAT-zh_en](./examples/aishell3_vctk/ernie_sat)。
+- 🔥 2022.08.22: 新增 [ERNIE-SAT](https://arxiv.org/abs/2211.03545) 模型: [ERNIE-SAT-vctk](./examples/vctk/ernie_sat)、[ERNIE-SAT-aishell3](./examples/aishell3/ernie_sat)、[ERNIE-SAT-zh_en](./examples/aishell3_vctk/ernie_sat)。
 - 🔥 2022.08.15: 将 [g2pW](https://github.com/GitYCC/g2pW) 引入 TTS 中文文本前端。
 - 🔥 2022.08.09: 发布[中英文混合 TTS](./examples/zh_en_tts/tts3)。
 - ⚡ 2022.08.03: TTS CLI 新增 ONNXRuntime 推理方式。
@@ -575,7 +578,7 @@ PaddleSpeech 的 **语音合成** 主要包含三个模块：文本前端、声
       </td>
     </tr>
     <tr>
-      <td>ERNIE-SAT</td>
+      <td><a href = "https://arxiv.org/abs/2211.03545">ERNIE-SAT</a></td>
       <td>VCTK / AISHELL-3 / ZH_EN</td>
       <td>
       <a href = "./examples/vctk/ernie_sat">ERNIE-SAT-vctk</a> / <a href = "./examples/aishell3/ernie_sat">ERNIE-SAT-aishell3</a> / <a href = "./examples/aishell3_vctk/ernie_sat">ERNIE-SAT-zh_en</a>
@@ -696,9 +699,9 @@ PaddleSpeech 的 **语音合成** 主要包含三个模块：文本前端、声
 </table>
 
 
-<a name="唤醒模型"></a>
+<a name="语音唤醒模型"></a>
 
-**唤醒**
+**语音唤醒**
 
 <table style="width:100%">
   <thead>
@@ -711,11 +714,11 @@ PaddleSpeech 的 **语音合成** 主要包含三个模块：文本前端、声
   </thead>
   <tbody>
   <tr>
-      <td>唤醒</td>
+      <td>语音唤醒</td>
       <td>hey-snips</td>
-      <td>PANN</td>
+      <td>MDTC</td>
       <td>
-      <a href = "./examples/hey_snips/kws0">pann-hey-snips</a>
+      <a href = "./examples/hey_snips/kws0">mdtc-hey-snips</a>
       </td>
     </tr>
   </tbody>
diff --git a/demos/asr_deployment/README.md b/demos/asr_deployment/README.md
new file mode 100644
index 000000000..9d36f19f2
--- /dev/null
+++ b/demos/asr_deployment/README.md
@@ -0,0 +1,100 @@
+([简体中文](./README_cn.md)|English)
+# ASR Deployment by SpeechX
+
+## Introduction
+
+ASR deployment support U2/U2++/Deepspeech2 asr model using c++, which is good practice in industry deployment.
+
+More info about SpeechX, please see [here](../../speechx/README.md).
+
+## Usage
+### 1. Environment
+
+* python - 3.7
+* docker - `registry.baidubce.com/paddlepaddle/paddle:2.2.2-gpu-cuda10.2-cudnn7`
+* os - Ubuntu 16.04.7 LTS
+* gcc/g++/gfortran - 8.2.0
+* cmake - 3.16.0
+
+More info please see [here](../../speechx/README.md).
+
+### 2. Compile SpeechX
+
+Please see [here](../../speechx/README.md).
+
+### 3. Usage
+
+For u2++ asr deployment example, please to see [here](../../speechx/examples/u2pp_ol/wenetspeech/).
+
+First go to `speechx/speechx/examples/u2pp_ol/wenetspeech` dir.
+
+- Source path.sh
+  ```bash
+  source path.sh
+  ```
+
+- Download Model, Prepare test data and cmvn
+  ```bash
+  run.sh --stage 0 --stop_stage 1
+  ```
+
+- Decode with WAV
+  
+  ```bash
+  # FP32
+  ./local/recognizer.sh
+
+  # INT8
+  ./local/recognizer_quant.sh
+  ```
+
+  Output:
+  ```bash
+  I1026 16:13:24.683531 48038 u2_recognizer_main.cc:55] utt: BAC009S0916W0495
+  I1026 16:13:24.683578 48038 u2_recognizer_main.cc:56] wav dur: 4.17119 sec.
+  I1026 16:13:24.683595 48038 u2_recognizer_main.cc:64] wav len (sample): 66739
+  I1026 16:13:25.037652 48038 u2_recognizer_main.cc:87] Pratial result: 3 这令
+  I1026 16:13:25.043697 48038 u2_recognizer_main.cc:87] Pratial result: 4 这令
+  I1026 16:13:25.222124 48038 u2_recognizer_main.cc:87] Pratial result: 5 这令被贷款
+  I1026 16:13:25.228385 48038 u2_recognizer_main.cc:87] Pratial result: 6 这令被贷款
+  I1026 16:13:25.414669 48038 u2_recognizer_main.cc:87] Pratial result: 7 这令被贷款的员工
+  I1026 16:13:25.420714 48038 u2_recognizer_main.cc:87] Pratial result: 8 这令被贷款的员工
+  I1026 16:13:25.608129 48038 u2_recognizer_main.cc:87] Pratial result: 9 这令被贷款的员工们请
+  I1026 16:13:25.801620 48038 u2_recognizer_main.cc:87] Pratial result: 10 这令被贷款的员工们请食难安
+  I1026 16:13:25.804101 48038 feature_cache.h:44] set finished
+  I1026 16:13:25.804128 48038 feature_cache.h:51] compute last feats done.
+  I1026 16:13:25.948771 48038 u2_recognizer_main.cc:87] Pratial result: 11 这令被贷款的员工们请食难安
+  I1026 16:13:26.246963 48038 u2_recognizer_main.cc:113] BAC009S0916W0495 这令被贷款的员工们请食难安
+  ```
+
+## Result
+
+> CER compute under aishell-test.
+> RTF compute with feature and decoder, which is more end to end.
+> Machine Intel(R) Xeon(R) Gold 6271C CPU @ 2.60GHz avx512_vnni
+
+### FP32
+
+```
+Overall -> 5.75 % N=104765 C=99035 S=5587 D=143 I=294
+Mandarin -> 5.75 % N=104762 C=99035 S=5584 D=143 I=294
+English -> 0.00 % N=0 C=0 S=0 D=0 I=0
+Other -> 100.00 % N=3 C=0 S=3 D=0 I=0
+```
+
+```
+RTF is: 0.315337
+```
+
+### INT8
+
+```
+Overall -> 5.83 % N=104765 C=98943 S=5675 D=147 I=286
+Mandarin -> 5.83 % N=104762 C=98943 S=5672 D=147 I=286
+English -> 0.00 % N=0 C=0 S=0 D=0 I=0
+Other -> 100.00 % N=3 C=0 S=3 D=0 I=0
+```
+
+```
+RTF is: 0.269674
+```
diff --git a/demos/asr_deployment/README_cn.md b/demos/asr_deployment/README_cn.md
new file mode 100644
index 000000000..ee4aa8489
--- /dev/null
+++ b/demos/asr_deployment/README_cn.md
@@ -0,0 +1,96 @@
+([简体中文](./README_cn.md)|English)
+# 基于SpeechX 的 ASR 部署 
+
+## 简介
+
+支持 U2/U2++/Deepspeech2 模型的 C++ 部署，其在工业实践中经常被用到。
+
+更多 Speechx 信息可以参看[文档](../../speechx/README.md)。
+
+## 使用
+### 1. 环境
+
+* python - 3.7
+* docker - `registry.baidubce.com/paddlepaddle/paddle:2.2.2-gpu-cuda10.2-cudnn7`
+* os - Ubuntu 16.04.7 LTS
+* gcc/g++/gfortran - 8.2.0
+* cmake - 3.16.0
+
+更多信息可以参看[文档](../../speechx/README.md)。
+
+### 2. 编译 SpeechX
+
+更多信息可以参看[文档](../../speechx/README.md)。
+
+### 3. 例子
+
+u2++ 识别部署参看[这里](../../speechx/examples/u2pp_ol/wenetspeech/)。
+
+以下是在 `speechx/speechx/examples/u2pp_ol/wenetspeech`.
+
+- Source path.sh
+  ```bash
+  source path.sh
+  ```
+
+- 下载模型，准备测试数据和cmvn文件
+  ```bash
+  run.sh --stage 0 --stop_stage 1
+  ```
+
+- 解码
+  
+  ```bash
+  # FP32
+  ./local/recognizer.sh
+
+  # INT8
+  ./local/recognizer_quant.sh
+  ```
+
+  输出:
+  ```bash
+  I1026 16:13:24.683531 48038 u2_recognizer_main.cc:55] utt: BAC009S0916W0495
+  I1026 16:13:24.683578 48038 u2_recognizer_main.cc:56] wav dur: 4.17119 sec.
+  I1026 16:13:24.683595 48038 u2_recognizer_main.cc:64] wav len (sample): 66739
+  I1026 16:13:25.037652 48038 u2_recognizer_main.cc:87] Pratial result: 3 这令
+  I1026 16:13:25.043697 48038 u2_recognizer_main.cc:87] Pratial result: 4 这令
+  I1026 16:13:25.222124 48038 u2_recognizer_main.cc:87] Pratial result: 5 这令被贷款
+  I1026 16:13:25.228385 48038 u2_recognizer_main.cc:87] Pratial result: 6 这令被贷款
+  I1026 16:13:25.414669 48038 u2_recognizer_main.cc:87] Pratial result: 7 这令被贷款的员工
+  I1026 16:13:25.420714 48038 u2_recognizer_main.cc:87] Pratial result: 8 这令被贷款的员工
+  I1026 16:13:25.608129 48038 u2_recognizer_main.cc:87] Pratial result: 9 这令被贷款的员工们请
+  I1026 16:13:25.801620 48038 u2_recognizer_main.cc:87] Pratial result: 10 这令被贷款的员工们请食难安
+  I1026 16:13:25.804101 48038 feature_cache.h:44] set finished
+  I1026 16:13:25.804128 48038 feature_cache.h:51] compute last feats done.
+  I1026 16:13:25.948771 48038 u2_recognizer_main.cc:87] Pratial result: 11 这令被贷款的员工们请食难安
+  I1026 16:13:26.246963 48038 u2_recognizer_main.cc:113] BAC009S0916W0495 这令被贷款的员工们请食难安
+  ```
+
+## 结果
+
+> CER 测试集为 aishell-test
+> RTF 计算包含提特征和解码
+> 测试机器： Intel(R) Xeon(R) Gold 6271C CPU @ 2.60GHz avx512_vnni
+
+### FP32
+
+```
+Overall -> 5.75 % N=104765 C=99035 S=5587 D=143 I=294
+Mandarin -> 5.75 % N=104762 C=99035 S=5584 D=143 I=294
+English -> 0.00 % N=0 C=0 S=0 D=0 I=0
+Other -> 100.00 % N=3 C=0 S=3 D=0 I=0
+```
+
+```
+RTF is: 0.315337
+```
+
+### INT8
+
+```
+Overall -> 5.87 % N=104765 C=98909 S=5711 D=145 I=289
+Mandarin -> 5.86 % N=104762 C=98909 S=5708 D=145 I=289
+English -> 0.00 % N=0 C=0 S=0 D=0 I=0
+Other -> 100.00 % N=3 C=0 S=3 D=0 I=0
+```
diff --git a/docs/source/cls/custom_dataset.md b/docs/source/cls/custom_dataset.md
index e39dcf12d..b7c06cd7a 100644
--- a/docs/source/cls/custom_dataset.md
+++ b/docs/source/cls/custom_dataset.md
@@ -108,7 +108,7 @@ for epoch in range(1, epochs + 1):
         optimizer.clear_grad()
 
         # Calculate loss
-        avg_loss = loss.numpy()[0]
+        avg_loss = float(loss)
 
         # Calculate metrics
         preds = paddle.argmax(logits, axis=1)
diff --git a/docs/source/released_model.md b/docs/source/released_model.md
index 2f3c9d098..45193701d 100644
--- a/docs/source/released_model.md
+++ b/docs/source/released_model.md
@@ -22,7 +22,7 @@ Acoustic Model | Training Data | Token-based | Size | Descriptions | CER | WER |
 Model | Pre-Train Method | Pre-Train Data | Finetune Data | Size | Descriptions | CER | WER |  Example Link |
 :-------------:| :------------:| :-----: | -----: | :-----: |:-----:| :-----:  | :-----:  | :-----: | 
 [Wav2vec2-large-960h-lv60-self Model](https://paddlespeech.bj.bcebos.com/wav2vec/wav2vec2-large-960h-lv60-self.pdparams) | wav2vec2 | Librispeech and LV-60k Dataset (5.3w h) | - | 1.18 GB |Pre-trained Wav2vec2.0 Model | - | - | - | 
-[Wav2vec2ASR-large-960h-librispeech Model](https://paddlespeech.bj.bcebos.com/s2t/librispeech/asr3/wav2vec2ASR-large-960h-librispeech_ckpt_1.3.0.model.tar.gz) | wav2vec2 | Librispeech and LV-60k Dataset (5.3w h) | Librispeech (960 h) | 1.18 GB |Encoder: Wav2vec2.0, Decoder: CTC, Decoding method: Greedy search | - | 0.0189 | [Wav2vecASR Librispeech ASR3](../../examples/librispeech/asr3) |
+[Wav2vec2ASR-large-960h-librispeech Model](https://paddlespeech.bj.bcebos.com/s2t/librispeech/asr3/wav2vec2ASR-large-960h-librispeech_ckpt_1.3.1.model.tar.gz) | wav2vec2 | Librispeech and LV-60k Dataset (5.3w h) | Librispeech (960 h) | 718 MB |Encoder: Wav2vec2.0, Decoder: CTC, Decoding method: Greedy search | - | 0.0189 | [Wav2vecASR Librispeech ASR3](../../examples/librispeech/asr3) |
 
 ### Language Model based on NGram
 Language Model | Training Data | Token-based | Size | Descriptions
@@ -53,6 +53,8 @@ FastSpeech2| AISHELL-3 |[fastspeech2-aishell3](https://github.com/PaddlePaddle/P
 FastSpeech2| LJSpeech |[fastspeech2-ljspeech](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/ljspeech/tts3)|[fastspeech2_nosil_ljspeech_ckpt_0.5.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_ljspeech_ckpt_0.5.zip)|[fastspeech2_ljspeech_static_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_ljspeech_static_1.1.0.zip) </br> [fastspeech2_ljspeech_onnx_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_ljspeech_onnx_1.1.0.zip)|145MB|
 FastSpeech2| VCTK |[fastspeech2-vctk](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/vctk/tts3)|[fastspeech2_vctk_ckpt_1.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_ckpt_1.2.0.zip)|[fastspeech2_vctk_static_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_static_1.1.0.zip) </br> [fastspeech2_vctk_onnx_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_onnx_1.1.0.zip) | 145MB|
 FastSpeech2| ZH_EN |[fastspeech2-zh_en](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/zh_en_tts/tts3)|[fastspeech2_mix_ckpt_1.2.0.zip](https://paddlespeech.bj.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_ckpt_1.2.0.zip)|[fastspeech2_mix_static_0.2.0.zip](https://paddlespeech.bj.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_static_0.2.0.zip) </br> [fastspeech2_mix_onnx_0.2.0.zip](https://paddlespeech.bj.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_onnx_0.2.0.zip) | 145MB|
+FastSpeech2| Male ||[fastspeech2_male_ckpt_1.3.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_ckpt_1.3.0.zip)| | |
+
 
 
 ### Vocoders
@@ -70,6 +72,7 @@ HiFiGAN | LJSpeech |[HiFiGAN-ljspeech](https://github.com/PaddlePaddle/PaddleSpe
 HiFiGAN | AISHELL-3 |[HiFiGAN-aishell3](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/voc5)|[hifigan_aishell3_ckpt_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_ckpt_0.2.0.zip)|[hifigan_aishell3_static_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_static_1.1.0.zip) </br> [hifigan_aishell3_onnx_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_onnx_1.1.0.zip)|46MB|
 HiFiGAN | VCTK |[HiFiGAN-vctk](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/vctk/voc5)|[hifigan_vctk_ckpt_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_ckpt_0.2.0.zip)|[hifigan_vctk_static_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_static_1.1.0.zip) </br> [hifigan_vctk_onnx_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_onnx_1.1.0.zip)|46MB|
 WaveRNN | CSMSC |[WaveRNN-csmsc](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/voc6)|[wavernn_csmsc_ckpt_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/wavernn/wavernn_csmsc_ckpt_0.2.0.zip)|[wavernn_csmsc_static_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/wavernn/wavernn_csmsc_static_0.2.0.zip)|18MB|
+Parallel WaveGAN| Male ||[pwg_male_ckpt_1.3.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_male_ckpt_1.3.0.zip)|||
 
 
 ### Voice Cloning
diff --git a/docs/tutorial/cls/cls_tutorial.ipynb b/docs/tutorial/cls/cls_tutorial.ipynb
index 56b488adc..3cee64991 100644
--- a/docs/tutorial/cls/cls_tutorial.ipynb
+++ b/docs/tutorial/cls/cls_tutorial.ipynb
@@ -509,7 +509,7 @@
     "        optimizer.clear_grad()\n",
     "\n",
     "        # Calculate loss\n",
-    "        avg_loss += loss.numpy()[0]\n",
+    "        avg_loss += float(loss)\n",
     "\n",
     "        # Calculate metrics\n",
     "        preds = paddle.argmax(logits, axis=1)\n",
diff --git a/examples/aishell3/ernie_sat/README.md b/examples/aishell3/ernie_sat/README.md
index 9b7768985..bd5964c3a 100644
--- a/examples/aishell3/ernie_sat/README.md
+++ b/examples/aishell3/ernie_sat/README.md
@@ -1,5 +1,5 @@
 # ERNIE-SAT with AISHELL-3 dataset
-ERNIE-SAT speech-text joint pretraining framework, which achieves SOTA results in cross-lingual multi-speaker speech synthesis and cross-lingual speech editing tasks, It can be applied to a series of scenarios such as Speech Editing, personalized Speech Synthesis, and Voice Cloning.
+[ERNIE-SAT](https://arxiv.org/abs/2211.03545) speech-text joint pretraining framework, which achieves SOTA results in cross-lingual multi-speaker speech synthesis and cross-lingual speech editing tasks, It can be applied to a series of scenarios such as Speech Editing, personalized Speech Synthesis, and Voice Cloning.
 
 ## Model Framework
 In ERNIE-SAT, we propose two innovations:
diff --git a/examples/aishell3_vctk/ernie_sat/README.md b/examples/aishell3_vctk/ernie_sat/README.md
index 321957835..fbf9244d1 100644
--- a/examples/aishell3_vctk/ernie_sat/README.md
+++ b/examples/aishell3_vctk/ernie_sat/README.md
@@ -1,5 +1,5 @@
 # ERNIE-SAT with AISHELL-3 and VCTK dataset
-ERNIE-SAT speech-text joint pretraining framework, which achieves SOTA results in cross-lingual multi-speaker speech synthesis and cross-lingual speech editing tasks, It can be applied to a series of scenarios such as Speech Editing, personalized Speech Synthesis, and Voice Cloning.
+[ERNIE-SAT](https://arxiv.org/abs/2211.03545) speech-text joint pretraining framework, which achieves SOTA results in cross-lingual multi-speaker speech synthesis and cross-lingual speech editing tasks, It can be applied to a series of scenarios such as Speech Editing, personalized Speech Synthesis, and Voice Cloning.
 
 ## Model Framework
 In ERNIE-SAT, we propose two innovations:
diff --git a/examples/librispeech/asr3/conf/wav2vec2ASR.yaml b/examples/librispeech/asr3/conf/wav2vec2ASR.yaml
index b19881b70..c45bd692a 100644
--- a/examples/librispeech/asr3/conf/wav2vec2ASR.yaml
+++ b/examples/librispeech/asr3/conf/wav2vec2ASR.yaml
@@ -70,7 +70,6 @@ train_manifest: data/manifest.train
 dev_manifest: data/manifest.dev
 test_manifest: data/manifest.test-clean
 
-
 ###########################################
 #              Dataloader                 #
 ###########################################
@@ -95,6 +94,12 @@ dist_sampler: True
 shortest_first: True
 return_lens_rate: True
   
+############################################
+#             Data Augmentation            #
+############################################
+audio_augment:  # for raw audio 
+  sample_rate: 16000
+  speeds: [95, 100, 105]
 
 ###########################################
 #                 Training                #
@@ -115,6 +120,3 @@ log_interval: 1
 checkpoint:
   kbest_n: 50
   latest_n: 5
-augment: True
-
-
diff --git a/examples/other/mfa/README.md b/examples/other/mfa/README.md
index c24524ab4..216d1275b 100644
--- a/examples/other/mfa/README.md
+++ b/examples/other/mfa/README.md
@@ -4,3 +4,6 @@ Run the following script to get started, for more detail, please see `run.sh`.
 ```bash
 ./run.sh
 ```
+# Rhythm tags for MFA
+If you want to get rhythm tags with duration through MFA tool, you may add flag `--rhy-with-duration` in the first two commands in `run.sh`
+Note that only CSMSC dataset is supported so far, and we replace `#` with `sp` in rhythm tags for MFA.
diff --git a/examples/other/mfa/local/generate_lexicon.py b/examples/other/mfa/local/generate_lexicon.py
index e9445665b..3deb24701 100644
--- a/examples/other/mfa/local/generate_lexicon.py
+++ b/examples/other/mfa/local/generate_lexicon.py
@@ -182,12 +182,17 @@ if __name__ == "__main__":
         "--with-tone", action="store_true", help="whether to consider tone.")
     parser.add_argument(
         "--with-r", action="store_true", help="whether to consider erhua.")
+    parser.add_argument(
+        "--rhy-with-duration",
+        action="store_true", )
     args = parser.parse_args()
 
     lexicon = generate_lexicon(args.with_tone, args.with_r)
     symbols = generate_symbols(lexicon)
 
     with open(args.output + ".lexicon", 'wt') as f:
+        if args.rhy_with_duration:
+            f.write("sp1 sp1\nsp2 sp2\nsp3 sp3\nsp4 sp4\n")
         for k, v in lexicon.items():
             f.write(f"{k} {v}\n")
 
diff --git a/examples/other/mfa/local/reorganize_baker.py b/examples/other/mfa/local/reorganize_baker.py
index 153e01d13..0e0035bda 100644
--- a/examples/other/mfa/local/reorganize_baker.py
+++ b/examples/other/mfa/local/reorganize_baker.py
@@ -23,6 +23,7 @@ for more details.
 """
 import argparse
 import os
+import re
 import shutil
 from concurrent.futures import ThreadPoolExecutor
 from pathlib import Path
@@ -32,6 +33,22 @@ import librosa
 import soundfile as sf
 from tqdm import tqdm
 
+repalce_dict = {
+    "；": "",
+    "。": "",
+    "：": "",
+    "—": "",
+    "）": "",
+    "，": "",
+    "“": "",
+    "（": "",
+    "、": "",
+    "…": "",
+    "！": "",
+    "？": "",
+    "”": ""
+}
+
 
 def get_transcripts(path: Union[str, Path]):
     transcripts = {}
@@ -55,9 +72,13 @@ def resample_and_save(source, target, sr=16000):
 
 def reorganize_baker(root_dir: Union[str, Path],
                      output_dir: Union[str, Path]=None,
-                     resample_audio=False):
+                     resample_audio=False,
+                     rhy_dur=False):
     root_dir = Path(root_dir).expanduser()
-    transcript_path = root_dir / "ProsodyLabeling" / "000001-010000.txt"
+    if rhy_dur:
+        transcript_path = root_dir / "ProsodyLabeling" / "000001-010000_rhy.txt"
+    else:
+        transcript_path = root_dir / "ProsodyLabeling" / "000001-010000.txt"
     transcriptions = get_transcripts(transcript_path)
 
     wave_dir = root_dir / "Wave"
@@ -92,6 +113,46 @@ def reorganize_baker(root_dir: Union[str, Path],
     print("Done!")
 
 
+def insert_rhy(sentence_first, sentence_second):
+    sub = '#'
+    return_words = []
+    sentence_first = sentence_first.translate(str.maketrans(repalce_dict))
+    rhy_idx = [substr.start() for substr in re.finditer(sub, sentence_first)]
+    re_rhy_idx = []
+    sentence_first_ = sentence_first.replace("#1", "").replace(
+        "#2", "").replace("#3", "").replace("#4", "")
+    sentence_seconds = sentence_second.split(" ")
+    for i, w in enumerate(rhy_idx):
+        re_rhy_idx.append(w - i * 2)
+    i = 0
+    # print("re_rhy_idx: ", re_rhy_idx)
+    for sentence_s in (sentence_seconds):
+        return_words.append(sentence_s)
+        if i < len(re_rhy_idx) and len(return_words) - i == re_rhy_idx[i]:
+            return_words.append("sp" + sentence_first[rhy_idx[i] + 1:rhy_idx[i]
+                                                      + 2])
+            i = i + 1
+    return return_words
+
+
+def normalize_rhy(root_dir: Union[str, Path]):
+    root_dir = Path(root_dir).expanduser()
+    transcript_path = root_dir / "ProsodyLabeling" / "000001-010000.txt"
+    target_transcript_path = root_dir / "ProsodyLabeling" / "000001-010000_rhy.txt"
+
+    with open(transcript_path) as f:
+        lines = f.readlines()
+
+    with open(target_transcript_path, 'wt') as f:
+        for i in range(0, len(lines), 2):
+            sentence_first = lines[i]  #第一行直接保存
+            f.write(sentence_first)
+            transcription = lines[i + 1].strip()
+            f.write("\t" + " ".join(
+                insert_rhy(sentence_first.split('\t')[1], transcription)) +
+                    "\n")
+
+
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(
         description="Reorganize Baker dataset for MFA")
@@ -104,6 +165,12 @@ if __name__ == "__main__":
         "--resample-audio",
         action="store_true",
         help="To resample audio files or just copy them")
+    parser.add_argument(
+        "--rhy-with-duration",
+        action="store_true", )
     args = parser.parse_args()
 
-    reorganize_baker(args.root_dir, args.output_dir, args.resample_audio)
+    if args.rhy_with_duration:
+        normalize_rhy(args.root_dir)
+    reorganize_baker(args.root_dir, args.output_dir, args.resample_audio,
+                     args.rhy_with_duration)
diff --git a/examples/other/tn/data/textnorm_test_cases.txt b/examples/other/tn/data/textnorm_test_cases.txt
index e9a479b47..17e90d0b6 100644
--- a/examples/other/tn/data/textnorm_test_cases.txt
+++ b/examples/other/tn/data/textnorm_test_cases.txt
@@ -122,4 +122,6 @@ iPad Pro的秒控键盘这次也推出白色版本。|iPad Pro的秒控键盘这
 近期也一反常态地发表看空言论|近期也一反常态地发表看空言论
 985|九八五
 12~23|十二到二十三
-12-23|十二到二十三
\ No newline at end of file
+12-23|十二到二十三
+25cm²|二十五平方厘米
+25m|米
\ No newline at end of file
diff --git a/examples/other/tts_finetune/tts3/README.md b/examples/other/tts_finetune/tts3/README.md
index fa691764c..8564af5f6 100644
--- a/examples/other/tts_finetune/tts3/README.md
+++ b/examples/other/tts_finetune/tts3/README.md
@@ -55,7 +55,7 @@ If you want to finetune Chinese pretrained model, you need to prepare Chinese da
 000001|ka2 er2 pu3 pei2 wai4 sun1 wan2 hua2 ti1
 ```
 
-Here is an example of the first 200 data of csmsc.
+Here is a Chinese data example of the first 200 data of csmsc.
 
 ```bash
 mkdir -p input && cd input
@@ -69,7 +69,7 @@ If you want to finetune English pretrained model, you need to prepare English da
 LJ001-0001|Printing, in the only sense with which we are at present concerned, differs from most if not from all the arts and crafts represented in the Exhibition
 ```
 
-Here is an example of the first 200 data of ljspeech.
+Here is an English data example of the first 200 data of ljspeech.
 
 ```bash
 mkdir -p input && cd input
@@ -78,7 +78,7 @@ unzip ljspeech_mini.zip
 cd ../
 ```
 
-If you want to finetune Chinese-English Mixed pretrained model, you need to prepare Chinese data or English data. Here is an example of the first 12 data of SSB0005 (the speaker of aishell3).
+If you want to finetune Chinese-English Mixed pretrained model, you need to prepare Chinese data or English data. Here is a Chinese data example of the first 12 data of SSB0005 (the speaker of aishell3).
 
 ```bash
 mkdir -p input && cd input
diff --git a/examples/other/tts_finetune/tts3/run_mix.sh b/examples/other/tts_finetune/tts3/run_mix.sh
old mode 100644
new mode 100755
index 71008ef5b..960278a53
--- a/examples/other/tts_finetune/tts3/run_mix.sh
+++ b/examples/other/tts_finetune/tts3/run_mix.sh
@@ -108,3 +108,4 @@ if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then
         --spk_id=$replace_spkid
 fi
 
+
diff --git a/examples/vctk/ernie_sat/README.md b/examples/vctk/ernie_sat/README.md
index 94c7ae25d..1808e2074 100644
--- a/examples/vctk/ernie_sat/README.md
+++ b/examples/vctk/ernie_sat/README.md
@@ -1,5 +1,5 @@
 # ERNIE-SAT with VCTK dataset
-ERNIE-SAT speech-text joint pretraining framework, which achieves SOTA results in cross-lingual multi-speaker speech synthesis and cross-lingual speech editing tasks, It can be applied to a series of scenarios such as Speech Editing, personalized Speech Synthesis, and Voice Cloning.
+[ERNIE-SAT](https://arxiv.org/abs/2211.03545) speech-text joint pretraining framework, which achieves SOTA results in cross-lingual multi-speaker speech synthesis and cross-lingual speech editing tasks, It can be applied to a series of scenarios such as Speech Editing, personalized Speech Synthesis, and Voice Cloning.
 
 ## Model Framework
 In ERNIE-SAT, we propose two innovations:
diff --git a/paddlespeech/cli/tts/infer.py b/paddlespeech/cli/tts/infer.py
index 3eb597156..707518c05 100644
--- a/paddlespeech/cli/tts/infer.py
+++ b/paddlespeech/cli/tts/infer.py
@@ -67,6 +67,7 @@ class TTSExecutor(BaseExecutor):
                 'fastspeech2_mix',
                 'tacotron2_csmsc',
                 'tacotron2_ljspeech',
+                'fastspeech2_male',
             ],
             help='Choose acoustic model type of tts task.')
         self.parser.add_argument(
@@ -122,6 +123,7 @@ class TTSExecutor(BaseExecutor):
                 'hifigan_aishell3',
                 'hifigan_vctk',
                 'wavernn_csmsc',
+                'pwgan_male',
             ],
             help='Choose vocoder type of tts task.')
 
diff --git a/paddlespeech/cls/exps/panns/train.py b/paddlespeech/cls/exps/panns/train.py
index fba38a01c..133893081 100644
--- a/paddlespeech/cls/exps/panns/train.py
+++ b/paddlespeech/cls/exps/panns/train.py
@@ -101,7 +101,7 @@ if __name__ == "__main__":
             optimizer.clear_grad()
 
             # Calculate loss
-            avg_loss += loss.numpy()[0]
+            avg_loss += float(loss)
 
             # Calculate metrics
             preds = paddle.argmax(logits, axis=1)
diff --git a/paddlespeech/kws/exps/mdtc/train.py b/paddlespeech/kws/exps/mdtc/train.py
index 94e45d590..d5bb5e020 100644
--- a/paddlespeech/kws/exps/mdtc/train.py
+++ b/paddlespeech/kws/exps/mdtc/train.py
@@ -110,7 +110,7 @@ if __name__ == '__main__':
             optimizer.clear_grad()
 
             # Calculate loss
-            avg_loss += loss.numpy()[0]
+            avg_loss += float(loss)
 
             # Calculate metrics
             num_corrects += corrects
diff --git a/paddlespeech/resource/pretrained_models.py b/paddlespeech/resource/pretrained_models.py
index df50a6a9d..3fad84b13 100644
--- a/paddlespeech/resource/pretrained_models.py
+++ b/paddlespeech/resource/pretrained_models.py
@@ -723,6 +723,22 @@ tts_dynamic_pretrained_models = {
             'speaker_id_map.txt',
         },
     },
+    "fastspeech2_male-zh": {
+        '1.0': {
+            'url':
+            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_ckpt_1.3.0.zip',
+            'md5':
+            'a4b1a2f667b878ec8f67375357b04282',
+            'config':
+            'default.yaml',
+            'ckpt':
+            'snapshot_iter_76000.pdz',
+            'speech_stats':
+            'speech_stats.npy',
+            'phones_dict':
+            'phone_id_map.txt',
+        },
+    },
     # tacotron2
     "tacotron2_csmsc-zh": {
         '1.0': {
@@ -813,6 +829,20 @@ tts_dynamic_pretrained_models = {
             'feats_stats.npy',
         },
     },
+    "pwgan_male-zh": {
+        '1.0': {
+            'url':
+            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_male_ckpt_1.3.0.zip',
+            'md5':
+            'c98cdb889c809973f8cc764437311132',
+            'config':
+            'default.yaml',
+            'ckpt':
+            'snapshot_iter_200000.pdz',
+            'speech_stats':
+            'feats_stats.npy',
+        },
+    },
     # mb_melgan
     "mb_melgan_csmsc-zh": {
         '1.0': {
diff --git a/paddlespeech/s2t/exps/wav2vec2/model.py b/paddlespeech/s2t/exps/wav2vec2/model.py
index 933e268ed..4f6bc0c5b 100644
--- a/paddlespeech/s2t/exps/wav2vec2/model.py
+++ b/paddlespeech/s2t/exps/wav2vec2/model.py
@@ -71,7 +71,8 @@ class Wav2Vec2ASRTrainer(Trainer):
         wavs_lens_rate = wavs_lens / wav.shape[1]
         target_lens_rate = target_lens / target.shape[1]
         wav = wav[:, :, 0]
-        wav = self.speech_augmentation(wav, wavs_lens_rate)
+        if hasattr(train_conf, 'speech_augment'):
+            wav = self.speech_augmentation(wav, wavs_lens_rate)
         loss = self.model(wav, wavs_lens_rate, target, target_lens_rate)
         # loss div by `batch_size * accum_grad`
         loss /= train_conf.accum_grad
@@ -277,7 +278,9 @@ class Wav2Vec2ASRTrainer(Trainer):
         logger.info("Setup model!")
 
         # setup speech augmentation for wav2vec2
-        self.speech_augmentation = TimeDomainSpecAugment()
+        if hasattr(config, 'audio_augment') and self.train:
+            self.speech_augmentation = TimeDomainSpecAugment(
+                **config.audio_augment)
 
         if not self.train:
             return
diff --git a/paddlespeech/s2t/models/wav2vec2/processing/speech_augmentation.py b/paddlespeech/s2t/models/wav2vec2/processing/speech_augmentation.py
index 78a0782e7..ac9bf45db 100644
--- a/paddlespeech/s2t/models/wav2vec2/processing/speech_augmentation.py
+++ b/paddlespeech/s2t/models/wav2vec2/processing/speech_augmentation.py
@@ -641,14 +641,11 @@ class DropChunk(nn.Layer):
 
 class TimeDomainSpecAugment(nn.Layer):
     """A time-domain approximation of the SpecAugment algorithm.
-
     This augmentation module implements three augmentations in
     the time-domain.
-
      1. Drop chunks of the audio (zero amplitude or white noise)
      2. Drop frequency bands (with band-drop filters)
      3. Speed peturbation (via resampling to slightly different rate)
-
     Arguments
     ---------
     perturb_prob : float from 0 to 1
@@ -677,7 +674,6 @@ class TimeDomainSpecAugment(nn.Layer):
     drop_chunk_noise_factor : float
         The noise factor used to scale the white noise inserted, relative to
         the average amplitude of the utterance. Default 0 (no noise inserted).
-
     Example
     -------
     >>> inputs = paddle.randn([10, 16000])
@@ -718,7 +714,6 @@ class TimeDomainSpecAugment(nn.Layer):
 
     def forward(self, waveforms, lengths):
         """Returns the distorted waveforms.
-
         Arguments
         ---------
         waveforms : tensor
diff --git a/paddlespeech/t2s/exps/sentences_ssml.txt b/paddlespeech/t2s/exps/sentences_ssml.txt
new file mode 100644
index 000000000..e3614f224
--- /dev/null
+++ b/paddlespeech/t2s/exps/sentences_ssml.txt
@@ -0,0 +1,10 @@
+0001 考古人员<speak>西<say-as pinyin='zang4'>藏</say-as>布达拉宫里发现一个被隐<say-as pinyin="cang2">藏</say-as>的装有宝<say-as pinyin="zang4">藏</say-as></speak>箱子。
+0002 <speak>有人询问中国银<say-as pinyin='hang2'>行</say-as>北京分<say-as pinyin='hang2 hang2'>行行</say-as>长是否叫任我<say-as pinyin='xing2'>行</say-as></speak>。
+0003 <speak>市委书记亲自<say-as pinyin='shuai4'>率</say-as>领审计员对这家公司进行财务审计，发现企业的利润<say-as pinyin='lv4'>率</say-as>数据虚假</speak>。
+0004 <speak>学生们对代<say-as pinyin='shu4'>数</say-as>理解不深刻，特别是小<say-as pinyin='shu4'>数</say-as>点，在<say-as pinyin='shu3 shu4'>数数</say-as>时容易弄错</speak>。
+0005 <speak>赵<say-as pinyin='chang2'>长</say-as>军从小学习武术，擅<say-as pinyin='chang2'>长</say-as>散打，<say-as pinyin='zhang3'>长</say-as>大后参军，担任连<say-as pinyin='zhang3'>长</say-as></speak>。
+0006 <speak>我说她<say-as pinyin='zhang3'>涨</say-as>了工资，她就<say-as pinyin='zhang4'>涨</say-as>红着脸，摇头否认</speak>。
+0007 <speak>请把这封信交<say-as pinyin='gei3'>给</say-as>团长，告诉他，前线的供<say-as pinyin='ji3'>给</say-as>一定要有保障</speak>。
+0008 <speak>矿下的<say-as pinyin='hang4'>巷</say-as>道，与北京四合院的小<say-as pinyin='xiang4'>巷</say-as>有点相似</speak>。
+0009 <speak>他常叹自己命<say-as pinyin='bo2'>薄</say-as>,几亩<say-as pinyin='bao2'>薄</say-as>田，种点<say-as pinyin='bo4'>薄</say-as>荷</speak>。
+0010 <speak>小明对天相很有研究，在<say-as pinyin='su4'>宿</say-as>舍说了一<say-as pinyin='xiu3'>宿</say-as>有关星<say-as pinyin='xiu4'>宿</say-as>的常识</speak>。
\ No newline at end of file
diff --git a/paddlespeech/t2s/frontend/g2pw/onnx_api.py b/paddlespeech/t2s/frontend/g2pw/onnx_api.py
index 4e6fad4e5..47c26a610 100644
--- a/paddlespeech/t2s/frontend/g2pw/onnx_api.py
+++ b/paddlespeech/t2s/frontend/g2pw/onnx_api.py
@@ -100,7 +100,7 @@ class G2PWOnnxConverter:
         ]
         self.non_polyphonic = {
             '一', '不', '和', '咋', '嗲', '剖', '差', '攢', '倒', '難', '奔', '勁', '拗',
-            '肖', '瘙', '誒', '泊'
+            '肖', '瘙', '誒', '泊', '听'
         }
         self.non_monophonic = {'似', '攢'}
         self.monophonic_chars = [
diff --git a/paddlespeech/t2s/frontend/zh_normalization/quantifier.py b/paddlespeech/t2s/frontend/zh_normalization/quantifier.py
index 268d7229b..598030e43 100644
--- a/paddlespeech/t2s/frontend/zh_normalization/quantifier.py
+++ b/paddlespeech/t2s/frontend/zh_normalization/quantifier.py
@@ -18,6 +18,25 @@ from .num import num2str
 # 温度表达式，温度会影响负号的读法
 # -3°C 零下三度
 RE_TEMPERATURE = re.compile(r'(-?)(\d+(\.\d+)?)(°C|℃|度|摄氏度)')
+measure_dict = {
+    "cm2": "平方厘米",
+    "cm²": "平方厘米",
+    "cm3": "立方厘米",
+    "cm³": "立方厘米",
+    "cm": "厘米",
+    "db": "分贝",
+    "ds": "毫秒",
+    "kg": "千克",
+    "km": "千米",
+    "m2": "平方米",
+    "m²": "平方米",
+    "m³": "立方米",
+    "m3": "立方米",
+    "ml": "毫升",
+    "m": "米",
+    "mm": "毫米",
+    "s": "秒"
+}
 
 
 def replace_temperature(match) -> str:
@@ -35,3 +54,10 @@ def replace_temperature(match) -> str:
     unit: str = "摄氏度" if unit == "摄氏度" else "度"
     result = f"{sign}{temperature}{unit}"
     return result
+
+
+def replace_measure(sentence) -> str:
+    for q_notation in measure_dict:
+        if q_notation in sentence:
+            sentence = sentence.replace(q_notation, measure_dict[q_notation])
+    return sentence
diff --git a/paddlespeech/t2s/frontend/zh_normalization/text_normlization.py b/paddlespeech/t2s/frontend/zh_normalization/text_normlization.py
index bc663c70d..8f8e3b07d 100644
--- a/paddlespeech/t2s/frontend/zh_normalization/text_normlization.py
+++ b/paddlespeech/t2s/frontend/zh_normalization/text_normlization.py
@@ -46,6 +46,7 @@ from .phonecode import RE_TELEPHONE
 from .phonecode import replace_mobile
 from .phonecode import replace_phone
 from .quantifier import RE_TEMPERATURE
+from .quantifier import replace_measure
 from .quantifier import replace_temperature
 
 
@@ -91,6 +92,7 @@ class TextNormalizer():
         sentence = RE_TIME.sub(replace_time, sentence)
 
         sentence = RE_TEMPERATURE.sub(replace_temperature, sentence)
+        sentence = replace_measure(sentence)
         sentence = RE_FRAC.sub(replace_frac, sentence)
         sentence = RE_PERCENTAGE.sub(replace_percentage, sentence)
         sentence = RE_MOBILE_PHONE.sub(replace_mobile, sentence)
diff --git a/speechx/examples/codelab/u2/utils b/speechx/examples/codelab/u2/utils
new file mode 120000
index 000000000..23cef9612
--- /dev/null
+++ b/speechx/examples/codelab/u2/utils
@@ -0,0 +1 @@
+../../../../utils
\ No newline at end of file
diff --git a/speechx/examples/u2pp_ol/wenetspeech/README.md b/speechx/examples/u2pp_ol/wenetspeech/README.md
index 9a8f8af51..b90b8e201 100644
--- a/speechx/examples/u2pp_ol/wenetspeech/README.md
+++ b/speechx/examples/u2pp_ol/wenetspeech/README.md
@@ -2,10 +2,10 @@
 
 ## Testing with Aishell Test Data
 
-## Download wav and model
+### Download wav and model
 
 ```
-run.sh --stop_stage 0
+./run.sh --stop_stage 0
 ```
 
 ### compute feature
@@ -22,7 +22,6 @@ run.sh --stop_stage 0
 
 ### decoding using wav
 
-
 ```
 ./run.sh --stage 3 --stop_stage 3
 ```
diff --git a/speechx/examples/u2pp_ol/wenetspeech/RESULTS.md b/speechx/examples/u2pp_ol/wenetspeech/RESULTS.md
index 6a8e8c46d..5b33f3641 100644
--- a/speechx/examples/u2pp_ol/wenetspeech/RESULTS.md
+++ b/speechx/examples/u2pp_ol/wenetspeech/RESULTS.md
@@ -2,9 +2,11 @@
 
 7176 utts, duration 36108.9 sec.
 
-## Attention Rescore
+## U2++ Attention Rescore
 
-### u2++ FP32
+> Intel(R) Xeon(R) Gold 6271C CPU @ 2.60GHz, support `avx512_vnni`
+> RTF with feature and decoder which is more end to end.
+### FP32
 
 #### CER
 
@@ -17,20 +19,29 @@ Other -> 100.00 % N=3 C=0 S=3 D=0 I=0
 
 #### RTF 
 
-> RTF with feature and decoder which is more end to end.
-
-* Intel(R) Xeon(R) Gold 6271C CPU @ 2.60GHz, support `avx512_vnni`
-
 ```
 I1027 10:52:38.662868 51665 u2_recognizer_main.cc:122] total wav duration is: 36108.9 sec
 I1027 10:52:38.662858 51665 u2_recognizer_main.cc:121] total cost:11169.1 sec
 I1027 10:52:38.662876 51665 u2_recognizer_main.cc:123] RTF is: 0.309318
 ```
 
-* Intel(R) Xeon(R) Gold 6148 CPU @ 2.40GHz, not support `avx512_vnni`
+### INT8
+
+> RTF relative improve 12.8%, which count feature and decoder time.
+
+#### CER
+
+```
+Overall -> 5.83 % N=104765 C=98943 S=5675 D=147 I=286
+Mandarin -> 5.83 % N=104762 C=98943 S=5672 D=147 I=286
+English -> 0.00 % N=0 C=0 S=0 D=0 I=0
+Other -> 100.00 % N=3 C=0 S=3 D=0 I=0
+```
+
+#### RTF 
 
 ```
-I1026 16:13:26.247121 48038 u2_recognizer_main.cc:123] total wav duration is: 36108.9 sec
-I1026 16:13:26.247130 48038 u2_recognizer_main.cc:124] total decode cost:13656.7 sec
-I1026 16:13:26.247138 48038 u2_recognizer_main.cc:125] RTF is: 0.378208
+I1110 09:59:52.551712 37249 u2_recognizer_main.cc:122] total wav duration is: 36108.9 sec
+I1110 09:59:52.551717 37249 u2_recognizer_main.cc:123] total decode cost:9737.63 sec
+I1110 09:59:52.551723 37249 u2_recognizer_main.cc:124] RTF is: 0.269674
 ```
diff --git a/speechx/examples/u2pp_ol/wenetspeech/local/decode.sh b/speechx/examples/u2pp_ol/wenetspeech/local/decode.sh
index e9c81009c..059ed1b36 100755
--- a/speechx/examples/u2pp_ol/wenetspeech/local/decode.sh
+++ b/speechx/examples/u2pp_ol/wenetspeech/local/decode.sh
@@ -9,8 +9,9 @@ nj=20
 mkdir -p $exp
 ckpt_dir=./data/model
 model_dir=$ckpt_dir/asr1_chunk_conformer_u2pp_wenetspeech_static_1.3.0.model/
+text=$data/test/text
 
-utils/run.pl JOB=1:$nj $data/split${nj}/JOB/decoder.fbank.wolm.log \
+utils/run.pl JOB=1:$nj $data/split${nj}/JOB/decoder.log \
 ctc_prefix_beam_search_decoder_main \
     --model_path=$model_dir/export.jit \
     --vocab_path=$model_dir/unit.txt \
@@ -20,6 +21,6 @@ ctc_prefix_beam_search_decoder_main \
     --feature_rspecifier=scp:$data/split${nj}/JOB/fbank.scp \
     --result_wspecifier=ark,t:$data/split${nj}/JOB/result_decode.ark
 
-cat $data/split${nj}/*/result_decode.ark > $exp/${label_file}
-utils/compute-wer.py --char=1 --v=1 $text $exp/${label_file} > $exp/${wer}
-tail -n 7 $exp/${wer}
\ No newline at end of file
+cat $data/split${nj}/*/result_decode.ark > $exp/aishell.decode.rsl
+utils/compute-wer.py --char=1 --v=1 $text $exp/aishell.decode.rsl > $exp/aishell.decode.err
+tail -n 7 $exp/aishell.decode.err
\ No newline at end of file
diff --git a/speechx/examples/u2pp_ol/wenetspeech/local/nnet.sh b/speechx/examples/u2pp_ol/wenetspeech/local/nnet.sh
index 5455b5c9b..f947e6b17 100755
--- a/speechx/examples/u2pp_ol/wenetspeech/local/nnet.sh
+++ b/speechx/examples/u2pp_ol/wenetspeech/local/nnet.sh
@@ -1,18 +1,21 @@
 #!/bin/bash
-set -x
 set -e
 
 . path.sh
 
+nj=20
 data=data
 exp=exp
+
 mkdir -p $exp
 ckpt_dir=./data/model
 model_dir=$ckpt_dir/asr1_chunk_conformer_u2pp_wenetspeech_static_1.3.0.model/
 
+utils/run.pl JOB=1:$nj $data/split${nj}/JOB/nnet.log \
 u2_nnet_main \
     --model_path=$model_dir/export.jit \
-    --feature_rspecifier=ark,t:$exp/fbank.ark \
+    --vocab_path=$model_dir/unit.txt \
+    --feature_rspecifier=ark,t:${data}/split${nj}/JOB/fbank.ark \
     --nnet_decoder_chunk=16 \
     --receptive_field_length=7 \
     --subsampling_rate=4 \
@@ -20,4 +23,3 @@ u2_nnet_main \
     --nnet_encoder_outs_wspecifier=ark,t:$exp/encoder_outs.ark \
     --nnet_prob_wspecifier=ark,t:$exp/logprobs.ark
 echo "u2 nnet decode."
-
diff --git a/speechx/examples/u2pp_ol/wenetspeech/run.sh b/speechx/examples/u2pp_ol/wenetspeech/run.sh
index 2bc855dec..870c5deeb 100755
--- a/speechx/examples/u2pp_ol/wenetspeech/run.sh
+++ b/speechx/examples/u2pp_ol/wenetspeech/run.sh
@@ -24,8 +24,6 @@ fi
 
 
 ckpt_dir=$data/model
-model_dir=$ckpt_dir/asr1_chunk_conformer_u2pp_wenetspeech_static_1.3.0.model/
-
 
 if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ];then
     #  download u2pp model
diff --git a/speechx/speechx/frontend/audio/data_cache.h b/speechx/speechx/frontend/audio/data_cache.h
index f538df1dd..5fe5e4fe0 100644
--- a/speechx/speechx/frontend/audio/data_cache.h
+++ b/speechx/speechx/frontend/audio/data_cache.h
@@ -32,7 +32,6 @@ class DataCache : public FrontendInterface {
     // accept waves/feats
     void Accept(const kaldi::VectorBase<kaldi::BaseFloat>& inputs) override {
         data_ = inputs;
-        SetDim(data_.Dim());
     }
 
     bool Read(kaldi::Vector<kaldi::BaseFloat>* feats) override {
@@ -41,7 +40,6 @@ class DataCache : public FrontendInterface {
         }
         (*feats) = data_;
         data_.Resize(0);
-        SetDim(data_.Dim());
         return true;
     }
 
diff --git a/speechx/speechx/nnet/decodable.cc b/speechx/speechx/nnet/decodable.cc
index 7f6859082..5fe2b9842 100644
--- a/speechx/speechx/nnet/decodable.cc
+++ b/speechx/speechx/nnet/decodable.cc
@@ -71,6 +71,7 @@ bool Decodable::AdvanceChunk() {
         VLOG(3) << "decodable exit;";
         return false;
     }
+    CHECK_GE(frontend_->Dim(), 0);
     VLOG(1) << "AdvanceChunk feat cost: " << timer.Elapsed() << " sec.";
     VLOG(2) << "Forward in " << features.Dim() / frontend_->Dim() << " feats.";
 
diff --git a/tests/test_tipc/configs/mdtc/train_infer_python.txt b/tests/test_tipc/configs/mdtc/train_infer_python.txt
index 7a5f658ee..6fb8c3484 100644
--- a/tests/test_tipc/configs/mdtc/train_infer_python.txt
+++ b/tests/test_tipc/configs/mdtc/train_infer_python.txt
@@ -49,9 +49,3 @@ null:null
 null:null
 null:null
 null:null
-===========================train_benchmark_params==========================
-batch_size:16|30
-fp_items:fp32
-iteration:50
---profiler-options:"batch_range=[10,35];state=GPU;tracer_option=Default;profile_path=model.profile"
-flags:null
diff --git a/tests/unit/cli/test_cli.sh b/tests/unit/cli/test_cli.sh
index c6837c303..cb24fa614 100755
--- a/tests/unit/cli/test_cli.sh
+++ b/tests/unit/cli/test_cli.sh
@@ -16,6 +16,7 @@ paddlespeech asr --model conformer_aishell --input ./zh.wav
 paddlespeech asr --model conformer_online_aishell --input ./zh.wav
 paddlespeech asr --model conformer_online_wenetspeech --input ./zh.wav
 paddlespeech asr --model conformer_online_multicn --input ./zh.wav
+paddlespeech asr --model conformer_u2pp_online_wenetspeech --lang zh --input zh.wav
 paddlespeech asr --model transformer_librispeech --lang en --input ./en.wav
 paddlespeech asr --model deepspeech2offline_aishell --input ./zh.wav
 paddlespeech asr --model deepspeech2online_wenetspeech --input ./zh.wav
@@ -53,6 +54,7 @@ paddlespeech tts --am fastspeech2_vctk --voc hifigan_vctk --input "Life was like
 paddlespeech tts --am tacotron2_csmsc --input "你好，欢迎使用百度飞桨深度学习框架！"
 paddlespeech tts --am tacotron2_csmsc --voc wavernn_csmsc --input "你好，欢迎使用百度飞桨深度学习框架！"
 paddlespeech tts --am tacotron2_ljspeech --voc pwgan_ljspeech --lang en --input "Life was like a box of chocolates, you never know what you're gonna get."
+paddlespeech tts --am fastspeech2_male --voc pwgan_male --input "你好，欢迎使用百度飞桨深度学习框架！"
 # mix tts
 # The `am` must be `fastspeech2_mix`!
 # The `lang` must be `mix`!
diff --git a/tests/unit/tts/test_pwg.py b/tests/unit/tts/test_pwg.py
index 78cb34f25..10c82c9fd 100644
--- a/tests/unit/tts/test_pwg.py
+++ b/tests/unit/tts/test_pwg.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 import paddle
 import torch
+from paddle.device.cuda import synchronize
 from parallel_wavegan.layers import residual_block
 from parallel_wavegan.layers import upsample
 from parallel_wavegan.models import parallel_wavegan as pwgan
@@ -24,7 +25,6 @@ from paddlespeech.t2s.models.parallel_wavegan import PWGGenerator
 from paddlespeech.t2s.models.parallel_wavegan import ResidualBlock
 from paddlespeech.t2s.models.parallel_wavegan import ResidualPWGDiscriminator
 from paddlespeech.t2s.utils.layer_tools import summary
-from paddlespeech.t2s.utils.profile import synchronize
 
 paddle.set_device("gpu:0")
 device = torch.device("cuda:0")