sys

3 years ago · 2ad3a81945
parent 6bdaf1f15e 4542684694
commit 2ad3a81945
2476 changed files with 10965 additions and 1220 deletions
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -50,13 +50,20 @@ repos:
        entry: bash .pre-commit-hooks/clang-format.hook -i
        language: system
        files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|cuh|proto)$
-        exclude: (?=speechx/speechx/kaldi|speechx/patch|speechx/tools/fstbin|speechx/tools/lmbin).*(\.cpp|\.cc|\.h|\.py)$
+        exclude: (?=speechx/speechx/kaldi|speechx/patch|speechx/tools/fstbin|speechx/tools/lmbin|third_party/ctc_decoders).*(\.cpp|\.cc|\.h|\.py)$
    #-   id: copyright_checker
    #    name: copyright_checker
    #    entry: python .pre-commit-hooks/copyright-check.hook
    #    language: system
    #    files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|py)$
    #    exclude: (?=third_party|pypinyin|speechx/speechx/kaldi|speechx/patch|speechx/tools/fstbin|speechx/tools/lmbin).*(\.cpp|\.cc|\.h|\.py)$
+    -   id: cpplint
+        name: cpplint
+        description: Static code analysis of C/C++ files
+        language: python
+        files: \.(h\+\+|h|hh|hxx|hpp|cuh|c|cc|cpp|cu|c\+\+|cxx|tpp|txx)$
+        exclude: (?=speechx/speechx/kaldi|speechx/patch|speechx/tools/fstbin|speechx/tools/lmbin|third_party/ctc_decoders).*(\.cpp|\.cc|\.h|\.py)$ 
+        entry: cpplint --filter=-build,-whitespace,+whitespace/comma,-whitespace/indent
 -   repo: https://github.com/asottile/reorder_python_imports
    rev: v2.4.0
    hooks:
--- a/0
+++ b/0
--- a/MANIFEST.in
+++ b/MANIFEST.in
--- a/README.md
+++ b/README.md
@ -157,11 +157,18 @@ Via the easy-to-use, efficient, flexible and scalable implementation, our vision
  - 🧩  *Cascaded models application*: as an extension of the typical traditional audio tasks, we combine the workflows of the aforementioned tasks with other fields like Natural language processing (NLP) and Computer Vision (CV).

 ### Recent Update
- 👑 2022.10.11: Add [Wav2vec2ASR](./examples/librispeech/asr3), wav2vec2.0 fine-tuning for ASR on LibriSpeech.
- 🔥 2022.09.26: Add Voice Cloning, TTS finetune, and ERNIE-SAT in [PaddleSpeech Web Demo](./demos/speech_web).
+- 👑 2022.11.18: Add [Whisper CLI and Demos](https://github.com/PaddlePaddle/PaddleSpeech/pull/2640), support multi language recognition and translation.
+- 🔥 2022.11.18: Add [Wav2vec2 CLI and Demos](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/demos/speech_ssl), Support ASR and Feature Extraction.
+- 🎉 2022.11.17: Add [male voice for TTS](https://github.com/PaddlePaddle/PaddleSpeech/pull/2660).
+- 🔥 2022.11.07: Add [U2/U2++ C++ High Performance Streaming ASR Deployment](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/speechx/examples/u2pp_ol/wenetspeech).
+- 👑 2022.11.01: Add [Adversarial Loss](https://arxiv.org/pdf/1907.04448.pdf) for [Chinese English mixed TTS](./examples/zh_en_tts/tts3).
+- 🔥 2022.10.26: Add [Prosody Prediction](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/rhy) for TTS.
+- 🎉 2022.10.21: Add [SSML](https://github.com/PaddlePaddle/PaddleSpeech/discussions/2538) for TTS Chinese Text Frontend.
+- 👑 2022.10.11: Add [Wav2vec2ASR-en](./examples/librispeech/asr3), wav2vec2.0 fine-tuning for ASR on LibriSpeech.
+- 🔥 2022.09.26: Add Voice Cloning, TTS finetune, and [ERNIE-SAT](https://arxiv.org/abs/2211.03545) in [PaddleSpeech Web Demo](./demos/speech_web).
 - ⚡ 2022.09.09: Add AISHELL-3 Voice Cloning [example](./examples/aishell3/vc2) with ECAPA-TDNN speaker encoder.
 - ⚡ 2022.08.25: Release TTS [finetune](./examples/other/tts_finetune/tts3) example.
- 🔥 2022.08.22: Add ERNIE-SAT models: [ERNIE-SAT-vctk](./examples/vctk/ernie_sat)、[ERNIE-SAT-aishell3](./examples/aishell3/ernie_sat)、[ERNIE-SAT-zh_en](./examples/aishell3_vctk/ernie_sat).
+- 🔥 2022.08.22: Add [ERNIE-SAT](https://arxiv.org/abs/2211.03545) models: [ERNIE-SAT-vctk](./examples/vctk/ernie_sat)、[ERNIE-SAT-aishell3](./examples/aishell3/ernie_sat)、[ERNIE-SAT-zh_en](./examples/aishell3_vctk/ernie_sat).
 - 🔥 2022.08.15: Add [g2pW](https://github.com/GitYCC/g2pW) into TTS Chinese Text Frontend.
 - 🔥 2022.08.09: Release [Chinese English mixed TTS](./examples/zh_en_tts/tts3).
 - ⚡ 2022.08.03: Add ONNXRuntime infer for  TTS CLI.
@ -576,7 +583,7 @@ PaddleSpeech supports a series of most popular models. They are summarized in [r
      </td>
    </tr>
    <tr>
-      <td>ERNIE-SAT</td>
+      <td><a href = "https://arxiv.org/abs/2211.03545">ERNIE-SAT</a></td>
      <td>VCTK / AISHELL-3 / ZH_EN</td>
      <td>
      <a href = "./examples/vctk/ernie_sat">ERNIE-SAT-vctk</a> / <a href = "./examples/aishell3/ernie_sat">ERNIE-SAT-aishell3</a> / <a href = "./examples/aishell3_vctk/ernie_sat">ERNIE-SAT-zh_en</a>
@ -697,6 +704,31 @@ PaddleSpeech supports a series of most popular models. They are summarized in [r
  </tbody>
 </table>

+<a name="KeywordSpotting"></a>
+
+**Keyword Spotting**
+
+<table style="width:100%">
+  <thead>
+    <tr>
+      <th> Task </th>
+      <th> Dataset </th>
+      <th> Model Type </th>
+      <th> Example </th>
+    </tr>
+  </thead>
+  <tbody>
+  <tr>
+      <td>Keyword Spotting</td>
+      <td>hey-snips</td>
+      <td>MDTC</td>
+      <td>
+      <a href = "./examples/hey_snips/kws0">mdtc-hey-snips</a>
+      </td>
+    </tr>
+  </tbody>
+</table>
+
 <a name="SpeakerVerification"></a>

 **Speaker Verification**
@ -826,7 +858,21 @@ The Text-to-Speech module is originally called [Parakeet](https://github.com/Pad
 ## Citation

 To cite PaddleSpeech for research, please use the following format.
-```tex
+```text
+@InProceedings{pmlr-v162-bai22d,
+  title = {{A}$^3${T}: Alignment-Aware Acoustic and Text Pretraining for Speech Synthesis and Editing},
+  author = {Bai, He and Zheng, Renjie and Chen, Junkun and Ma, Mingbo and Li, Xintong and Huang, Liang},
+  booktitle = {Proceedings of the 39th International Conference on Machine Learning},
+  pages = {1399--1411},
+  year = {2022},
+  volume = {162},
+  series = {Proceedings of Machine Learning Research},
+  month = {17--23 Jul},
+  publisher = {PMLR},
+  pdf = {https://proceedings.mlr.press/v162/bai22d/bai22d.pdf},
+  url = {https://proceedings.mlr.press/v162/bai22d.html},
+}
+
@inproceedings{zhang2022paddlespeech,
    title = {PaddleSpeech: An Easy-to-Use All-in-One Speech Toolkit},
    author = {Hui Zhang, Tian Yuan, Junkun Chen, Xintong Li, Renjie Zheng, Yuxin Huang, Xiaojie Chen, Enlei Gong, Zeyu Chen, Xiaoguang Hu, dianhai yu, Yanjun Ma, Liang Huang},
@ -923,8 +969,8 @@ You are warmly welcome to submit questions in [discussions](https://github.com/P

 ## Acknowledgement
 - Many thanks to [HighCWu](https://github.com/HighCWu) for adding [VITS-aishell3](./examples/aishell3/vits) and [VITS-VC](./examples/aishell3/vits-vc) examples.
- Many thanks to [david-95](https://github.com/david-95) improved TTS, fixed multi-punctuation bug, and contributed to multiple program and data. 
- Many thanks to [BarryKCL](https://github.com/BarryKCL) improved TTS Chinses frontend based on [G2PW](https://github.com/GitYCC/g2pW).
+- Many thanks to [david-95](https://github.com/david-95) for fixing multi-punctuation bug、contributing to multiple program and data, and adding [SSML](https://github.com/PaddlePaddle/PaddleSpeech/discussions/2538) for TTS Chinese Text Frontend. 
+- Many thanks to [BarryKCL](https://github.com/BarryKCL) for improving TTS Chinses Frontend based on [G2PW](https://github.com/GitYCC/g2pW).
 - Many thanks to [yeyupiaoling](https://github.com/yeyupiaoling)/[PPASR](https://github.com/yeyupiaoling/PPASR)/[PaddlePaddle-DeepSpeech](https://github.com/yeyupiaoling/PaddlePaddle-DeepSpeech)/[VoiceprintRecognition-PaddlePaddle](https://github.com/yeyupiaoling/VoiceprintRecognition-PaddlePaddle)/[AudioClassification-PaddlePaddle](https://github.com/yeyupiaoling/AudioClassification-PaddlePaddle) for years of attention, constructive advice and great help.
 - Many thanks to [mymagicpower](https://github.com/mymagicpower) for the Java implementation of ASR upon [short](https://github.com/mymagicpower/AIAS/tree/main/3_audio_sdks/asr_sdk) and [long](https://github.com/mymagicpower/AIAS/tree/main/3_audio_sdks/asr_long_audio_sdk) audio files.
 - Many thanks to [JiehangXie](https://github.com/JiehangXie)/[PaddleBoBo](https://github.com/JiehangXie/PaddleBoBo) for developing Virtual Uploader(VUP)/Virtual YouTuber(VTuber) with PaddleSpeech TTS function.
--- a/README_cn.md
+++ b/README_cn.md
@ -164,11 +164,18 @@

  
 ### 近期更新
- 👑 2022.10.11: 新增 [Wav2vec2ASR](./examples/librispeech/asr3), 在 LibriSpeech 上针对ASR任务对wav2vec2.0 的fine-tuning.
- 🔥 2022.09.26: 新增 Voice Cloning, TTS finetune 和 ERNIE-SAT 到 [PaddleSpeech 网页应用](./demos/speech_web)。
+- 👑 2022.11.18: 新增 [Whisper CLI 和 Demos](https://github.com/PaddlePaddle/PaddleSpeech/pull/2640)，支持多种语言的识别与翻译。
+- 🔥 2022.11.18: 新增 [Wav2vec2 CLI 和 Demos](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/demos/speech_ssl), 支持 ASR 和 特征提取.
+- 🎉 2022.11.17: TTS 新增[高质量男性音色](https://github.com/PaddlePaddle/PaddleSpeech/pull/2660)。
+- 🔥 2022.11.07: 新增 [U2/U2++ 高性能流式 ASR C++ 部署](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/speechx/examples/u2pp_ol/wenetspeech)。
+- 👑 2022.11.01: [中英文混合 TTS](./examples/zh_en_tts/tts3) 新增 [Adversarial Loss](https://arxiv.org/pdf/1907.04448.pdf) 模块。
+- 🔥 2022.10.26: TTS 新增[韵律预测](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/rhy)功能。
+- 🎉 2022.10.21: TTS 中文文本前端新增 [SSML](https://github.com/PaddlePaddle/PaddleSpeech/discussions/2538) 功能。
+- 👑 2022.10.11: 新增 [Wav2vec2ASR-en](./examples/librispeech/asr3), 在 LibriSpeech 上针对 ASR 任务对 wav2vec2.0 的 finetuning。
+- 🔥 2022.09.26: 新增 Voice Cloning, TTS finetune 和 [ERNIE-SAT](https://arxiv.org/abs/2211.03545) 到 [PaddleSpeech 网页应用](./demos/speech_web)。
 - ⚡ 2022.09.09: 新增基于 ECAPA-TDNN 声纹模型的 AISHELL-3 Voice Cloning [示例](./examples/aishell3/vc2)。
 - ⚡ 2022.08.25: 发布 TTS [finetune](./examples/other/tts_finetune/tts3) 示例。
- 🔥 2022.08.22: 新增 ERNIE-SAT 模型: [ERNIE-SAT-vctk](./examples/vctk/ernie_sat)、[ERNIE-SAT-aishell3](./examples/aishell3/ernie_sat)、[ERNIE-SAT-zh_en](./examples/aishell3_vctk/ernie_sat)。
+- 🔥 2022.08.22: 新增 [ERNIE-SAT](https://arxiv.org/abs/2211.03545) 模型: [ERNIE-SAT-vctk](./examples/vctk/ernie_sat)、[ERNIE-SAT-aishell3](./examples/aishell3/ernie_sat)、[ERNIE-SAT-zh_en](./examples/aishell3_vctk/ernie_sat)。
 - 🔥 2022.08.15: 将 [g2pW](https://github.com/GitYCC/g2pW) 引入 TTS 中文文本前端。
 - 🔥 2022.08.09: 发布[中英文混合 TTS](./examples/zh_en_tts/tts3)。
 - ⚡ 2022.08.03: TTS CLI 新增 ONNXRuntime 推理方式。
@ -573,7 +580,7 @@ PaddleSpeech 的 **语音合成** 主要包含三个模块：文本前端、声
      </td>
    </tr>
    <tr>
-      <td>ERNIE-SAT</td>
+      <td><a href = "https://arxiv.org/abs/2211.03545">ERNIE-SAT</a></td>
      <td>VCTK / AISHELL-3 / ZH_EN</td>
      <td>
      <a href = "./examples/vctk/ernie_sat">ERNIE-SAT-vctk</a> / <a href = "./examples/aishell3/ernie_sat">ERNIE-SAT-aishell3</a> / <a href = "./examples/aishell3_vctk/ernie_sat">ERNIE-SAT-zh_en</a>
@ -694,6 +701,31 @@ PaddleSpeech 的 **语音合成** 主要包含三个模块：文本前端、声
 </table>


+<a name="语音唤醒模型"></a>
+
+**语音唤醒**
+
+<table style="width:100%">
+  <thead>
+    <tr>
+      <th> 任务 </th>
+      <th> 数据集 </th>
+      <th> 模型类型 </th>
+      <th> 脚本 </th>
+    </tr>
+  </thead>
+  <tbody>
+  <tr>
+      <td>语音唤醒</td>
+      <td>hey-snips</td>
+      <td>MDTC</td>
+      <td>
+      <a href = "./examples/hey_snips/kws0">mdtc-hey-snips</a>
+      </td>
+    </tr>
+  </tbody>
+</table>
+
 <a name="声纹识别模型"></a>

 **声纹识别**
@ -832,6 +864,20 @@ PaddleSpeech 的 **语音合成** 主要包含三个模块：文本前端、声

 要引用 PaddleSpeech 进行研究，请使用以下格式进行引用。
 ```text
+@InProceedings{pmlr-v162-bai22d,
+  title = {{A}$^3${T}: Alignment-Aware Acoustic and Text Pretraining for Speech Synthesis and Editing},
+  author = {Bai, He and Zheng, Renjie and Chen, Junkun and Ma, Mingbo and Li, Xintong and Huang, Liang},
+  booktitle = {Proceedings of the 39th International Conference on Machine Learning},
+  pages = {1399--1411},
+  year = {2022},
+  volume = {162},
+  series = {Proceedings of Machine Learning Research},
+  month = {17--23 Jul},
+  publisher = {PMLR},
+  pdf = {https://proceedings.mlr.press/v162/bai22d/bai22d.pdf},
+  url = {https://proceedings.mlr.press/v162/bai22d.html},
+}
+
@inproceedings{zhang2022paddlespeech,
    title = {PaddleSpeech: An Easy-to-Use All-in-One Speech Toolkit},
    author = {Hui Zhang, Tian Yuan, Junkun Chen, Xintong Li, Renjie Zheng, Yuxin Huang, Xiaojie Chen, Enlei Gong, Zeyu Chen, Xiaoguang Hu, dianhai yu, Yanjun Ma, Liang Huang},
@ -928,7 +974,7 @@ PaddleSpeech 的 **语音合成** 主要包含三个模块：文本前端、声

 ## 致谢
 - 非常感谢 [HighCWu](https://github.com/HighCWu) 新增 [VITS-aishell3](./examples/aishell3/vits) 和 [VITS-VC](./examples/aishell3/vits-vc) 代码示例。
- 非常感谢 [david-95](https://github.com/david-95) 修复句尾多标点符号出错的问题，贡献补充多条程序和数据。
+- 非常感谢 [david-95](https://github.com/david-95) 修复 TTS 句尾多标点符号出错的问题，贡献补充多条程序和数据。为 TTS 中文文本前端新增 [SSML](https://github.com/PaddlePaddle/PaddleSpeech/discussions/2538) 功能。
 - 非常感谢 [BarryKCL](https://github.com/BarryKCL) 基于 [G2PW](https://github.com/GitYCC/g2pW) 对 TTS 中文文本前端的优化。
 - 非常感谢 [yeyupiaoling](https://github.com/yeyupiaoling)/[PPASR](https://github.com/yeyupiaoling/PPASR)/[PaddlePaddle-DeepSpeech](https://github.com/yeyupiaoling/PaddlePaddle-DeepSpeech)/[VoiceprintRecognition-PaddlePaddle](https://github.com/yeyupiaoling/VoiceprintRecognition-PaddlePaddle)/[AudioClassification-PaddlePaddle](https://github.com/yeyupiaoling/AudioClassification-PaddlePaddle) 多年来的关注和建议，以及在诸多问题上的帮助。
 - 非常感谢 [mymagicpower](https://github.com/mymagicpower) 采用PaddleSpeech 对 ASR 的[短语音](https://github.com/mymagicpower/AIAS/tree/main/3_audio_sdks/asr_sdk)及[长语音](https://github.com/mymagicpower/AIAS/tree/main/3_audio_sdks/asr_long_audio_sdk)进行 Java 实现。
--- a/dataset/aidatatang_200zh/.gitignore
+++ b/dataset/aidatatang_200zh/.gitignore
--- a/dataset/aidatatang_200zh/README.md
+++ b/dataset/aidatatang_200zh/README.md
--- a/dataset/aidatatang_200zh/aidatatang_200zh.py
+++ b/dataset/aidatatang_200zh/aidatatang_200zh.py
--- a/dataset/aishell/.gitignore
+++ b/dataset/aishell/.gitignore
--- a/dataset/aishell/README.md
+++ b/dataset/aishell/README.md
--- a/dataset/aishell/aishell.py
+++ b/dataset/aishell/aishell.py
--- a/dataset/aishell3/README.md
+++ b/dataset/aishell3/README.md
--- a/dataset/chime3_background/chime3_background.py
+++ b/dataset/chime3_background/chime3_background.py
--- a/dataset/gigaspeech/.gitignore
+++ b/dataset/gigaspeech/.gitignore
--- a/dataset/gigaspeech/README.md
+++ b/dataset/gigaspeech/README.md
--- a/dataset/gigaspeech/gigaspeech.py
+++ b/dataset/gigaspeech/gigaspeech.py
--- a/dataset/librispeech/.gitignore
+++ b/dataset/librispeech/.gitignore
--- a/dataset/librispeech/librispeech.py
+++ b/dataset/librispeech/librispeech.py
--- a/dataset/magicdata/README.md
+++ b/dataset/magicdata/README.md
--- a/dataset/mini_librispeech/.gitignore
+++ b/dataset/mini_librispeech/.gitignore
--- a/dataset/mini_librispeech/mini_librispeech.py
+++ b/dataset/mini_librispeech/mini_librispeech.py
--- a/dataset/multi_cn/README.md
+++ b/dataset/multi_cn/README.md
--- a/dataset/musan/.gitignore
+++ b/dataset/musan/.gitignore
--- a/dataset/musan/musan.py
+++ b/dataset/musan/musan.py
--- a/dataset/primewords/README.md
+++ b/dataset/primewords/README.md
--- a/dataset/rir_noise/.gitignore
+++ b/dataset/rir_noise/.gitignore
--- a/dataset/rir_noise/rir_noise.py
+++ b/dataset/rir_noise/rir_noise.py
--- a/dataset/st-cmds/README.md
+++ b/dataset/st-cmds/README.md
--- a/dataset/ted_en_zh/.gitignore
+++ b/dataset/ted_en_zh/.gitignore
--- a/dataset/ted_en_zh/ted_en_zh.py
+++ b/dataset/ted_en_zh/ted_en_zh.py
--- a/dataset/thchs30/.gitignore
+++ b/dataset/thchs30/.gitignore
--- a/dataset/thchs30/README.md
+++ b/dataset/thchs30/README.md
--- a/dataset/thchs30/thchs30.py
+++ b/dataset/thchs30/thchs30.py
--- a/dataset/timit/.gitignore
+++ b/dataset/timit/.gitignore
--- a/dataset/timit/timit.py
+++ b/dataset/timit/timit.py
--- a/dataset/timit/timit_kaldi_standard_split.py
+++ b/dataset/timit/timit_kaldi_standard_split.py
--- a/dataset/voxceleb/README.md
+++ b/dataset/voxceleb/README.md
--- a/dataset/voxceleb/voxceleb1.py
+++ b/dataset/voxceleb/voxceleb1.py
--- a/dataset/voxceleb/voxceleb2.py
+++ b/dataset/voxceleb/voxceleb2.py
--- a/dataset/voxforge/run_data.sh
+++ b/dataset/voxforge/run_data.sh
--- a/dataset/voxforge/voxforge.py
+++ b/dataset/voxforge/voxforge.py
--- a/demos/README.md
+++ b/demos/README.md
@ -17,3 +17,5 @@ This directory contains many speech applications in multiple scenarios.
 * story talker - book reader based on OCR and TTS  
 * style_fs2 - multi style control for FastSpeech2 model  
 * text_to_speech - convert text into speech 
+* self supervised pretraining - speech feature extraction and speech recognition based on wav2vec2
+* Wishper - speech recognize and translate based on Whisper model
--- a/demos/README_cn.md
+++ b/demos/README_cn.md
@ -17,3 +17,5 @@
 * 会说话的故事书 - 基于 OCR 和语音合成的会说话的故事书。
 * 个性化语音合成 - 基于 FastSpeech2 模型的个性化语音合成。 
 * 语音合成 - 基于给定的文本生成语音音频。
+* 自监督预训练模型 - 基于wav2vec2的语音特征提取和语音识别。
+* Whisper - 基于Whisper模型的语音识别与翻译。
--- a/demos/asr_deployment/README.md
+++ b/demos/asr_deployment/README.md
@ -0,0 +1,100 @@
+([简体中文](./README_cn.md)|English)
+# ASR Deployment by SpeechX
+
+## Introduction
+
+ASR deployment support U2/U2++/Deepspeech2 asr model using c++, which is good practice in industry deployment.
+
+More info about SpeechX, please see [here](../../speechx/README.md).
+
+## Usage
+### 1. Environment
+
+* python - 3.7
+* docker - `registry.baidubce.com/paddlepaddle/paddle:2.2.2-gpu-cuda10.2-cudnn7`
+* os - Ubuntu 16.04.7 LTS
+* gcc/g++/gfortran - 8.2.0
+* cmake - 3.16.0
+
+More info please see [here](../../speechx/README.md).
+
+### 2. Compile SpeechX
+
+Please see [here](../../speechx/README.md).
+
+### 3. Usage
+
+For u2++ asr deployment example, please to see [here](../../speechx/examples/u2pp_ol/wenetspeech/).
+
+First go to `speechx/speechx/examples/u2pp_ol/wenetspeech` dir.
+
+- Source path.sh
+  ```bash
+  source path.sh
+  ```
+
+- Download Model, Prepare test data and cmvn
+  ```bash
+  run.sh --stage 0 --stop_stage 1
+  ```
+
+- Decode with WAV
+  
+  ```bash
+  # FP32
+  ./local/recognizer.sh
+
+  # INT8
+  ./local/recognizer_quant.sh
+  ```
+
+  Output:
+  ```bash
+  I1026 16:13:24.683531 48038 u2_recognizer_main.cc:55] utt: BAC009S0916W0495
+  I1026 16:13:24.683578 48038 u2_recognizer_main.cc:56] wav dur: 4.17119 sec.
+  I1026 16:13:24.683595 48038 u2_recognizer_main.cc:64] wav len (sample): 66739
+  I1026 16:13:25.037652 48038 u2_recognizer_main.cc:87] Pratial result: 3 这令
+  I1026 16:13:25.043697 48038 u2_recognizer_main.cc:87] Pratial result: 4 这令
+  I1026 16:13:25.222124 48038 u2_recognizer_main.cc:87] Pratial result: 5 这令被贷款
+  I1026 16:13:25.228385 48038 u2_recognizer_main.cc:87] Pratial result: 6 这令被贷款
+  I1026 16:13:25.414669 48038 u2_recognizer_main.cc:87] Pratial result: 7 这令被贷款的员工
+  I1026 16:13:25.420714 48038 u2_recognizer_main.cc:87] Pratial result: 8 这令被贷款的员工
+  I1026 16:13:25.608129 48038 u2_recognizer_main.cc:87] Pratial result: 9 这令被贷款的员工们请
+  I1026 16:13:25.801620 48038 u2_recognizer_main.cc:87] Pratial result: 10 这令被贷款的员工们请食难安
+  I1026 16:13:25.804101 48038 feature_cache.h:44] set finished
+  I1026 16:13:25.804128 48038 feature_cache.h:51] compute last feats done.
+  I1026 16:13:25.948771 48038 u2_recognizer_main.cc:87] Pratial result: 11 这令被贷款的员工们请食难安
+  I1026 16:13:26.246963 48038 u2_recognizer_main.cc:113] BAC009S0916W0495 这令被贷款的员工们请食难安
+  ```
+
+## Result
+
+> CER compute under aishell-test.
+> RTF compute with feature and decoder, which is more end to end.
+> Machine Intel(R) Xeon(R) Gold 6271C CPU @ 2.60GHz avx512_vnni
+
+### FP32
+
+```
+Overall -> 5.75 % N=104765 C=99035 S=5587 D=143 I=294
+Mandarin -> 5.75 % N=104762 C=99035 S=5584 D=143 I=294
+English -> 0.00 % N=0 C=0 S=0 D=0 I=0
+Other -> 100.00 % N=3 C=0 S=3 D=0 I=0
+```
+
+```
+RTF is: 0.315337
+```
+
+### INT8
+
+```
+Overall -> 5.83 % N=104765 C=98943 S=5675 D=147 I=286
+Mandarin -> 5.83 % N=104762 C=98943 S=5672 D=147 I=286
+English -> 0.00 % N=0 C=0 S=0 D=0 I=0
+Other -> 100.00 % N=3 C=0 S=3 D=0 I=0
+```
+
+```
+RTF is: 0.269674
+```
--- a/demos/asr_deployment/README_cn.md
+++ b/demos/asr_deployment/README_cn.md
@ -0,0 +1,96 @@
+([简体中文](./README_cn.md)|English)
+# 基于SpeechX 的 ASR 部署 
+
+## 简介
+
+支持 U2/U2++/Deepspeech2 模型的 C++ 部署，其在工业实践中经常被用到。
+
+更多 Speechx 信息可以参看[文档](../../speechx/README.md)。
+
+## 使用
+### 1. 环境
+
+* python - 3.7
+* docker - `registry.baidubce.com/paddlepaddle/paddle:2.2.2-gpu-cuda10.2-cudnn7`
+* os - Ubuntu 16.04.7 LTS
+* gcc/g++/gfortran - 8.2.0
+* cmake - 3.16.0
+
+更多信息可以参看[文档](../../speechx/README.md)。
+
+### 2. 编译 SpeechX
+
+更多信息可以参看[文档](../../speechx/README.md)。
+
+### 3. 例子
+
+u2++ 识别部署参看[这里](../../speechx/examples/u2pp_ol/wenetspeech/)。
+
+以下是在 `speechx/speechx/examples/u2pp_ol/wenetspeech`.
+
+- Source path.sh
+  ```bash
+  source path.sh
+  ```
+
+- 下载模型，准备测试数据和cmvn文件
+  ```bash
+  run.sh --stage 0 --stop_stage 1
+  ```
+
+- 解码
+  
+  ```bash
+  # FP32
+  ./local/recognizer.sh
+
+  # INT8
+  ./local/recognizer_quant.sh
+  ```
+
+  输出:
+  ```bash
+  I1026 16:13:24.683531 48038 u2_recognizer_main.cc:55] utt: BAC009S0916W0495
+  I1026 16:13:24.683578 48038 u2_recognizer_main.cc:56] wav dur: 4.17119 sec.
+  I1026 16:13:24.683595 48038 u2_recognizer_main.cc:64] wav len (sample): 66739
+  I1026 16:13:25.037652 48038 u2_recognizer_main.cc:87] Pratial result: 3 这令
+  I1026 16:13:25.043697 48038 u2_recognizer_main.cc:87] Pratial result: 4 这令
+  I1026 16:13:25.222124 48038 u2_recognizer_main.cc:87] Pratial result: 5 这令被贷款
+  I1026 16:13:25.228385 48038 u2_recognizer_main.cc:87] Pratial result: 6 这令被贷款
+  I1026 16:13:25.414669 48038 u2_recognizer_main.cc:87] Pratial result: 7 这令被贷款的员工
+  I1026 16:13:25.420714 48038 u2_recognizer_main.cc:87] Pratial result: 8 这令被贷款的员工
+  I1026 16:13:25.608129 48038 u2_recognizer_main.cc:87] Pratial result: 9 这令被贷款的员工们请
+  I1026 16:13:25.801620 48038 u2_recognizer_main.cc:87] Pratial result: 10 这令被贷款的员工们请食难安
+  I1026 16:13:25.804101 48038 feature_cache.h:44] set finished
+  I1026 16:13:25.804128 48038 feature_cache.h:51] compute last feats done.
+  I1026 16:13:25.948771 48038 u2_recognizer_main.cc:87] Pratial result: 11 这令被贷款的员工们请食难安
+  I1026 16:13:26.246963 48038 u2_recognizer_main.cc:113] BAC009S0916W0495 这令被贷款的员工们请食难安
+  ```
+
+## 结果
+
+> CER 测试集为 aishell-test
+> RTF 计算包含提特征和解码
+> 测试机器： Intel(R) Xeon(R) Gold 6271C CPU @ 2.60GHz avx512_vnni
+
+### FP32
+
+```
+Overall -> 5.75 % N=104765 C=99035 S=5587 D=143 I=294
+Mandarin -> 5.75 % N=104762 C=99035 S=5584 D=143 I=294
+English -> 0.00 % N=0 C=0 S=0 D=0 I=0
+Other -> 100.00 % N=3 C=0 S=3 D=0 I=0
+```
+
+```
+RTF is: 0.315337
+```
+
+### INT8
+
+```
+Overall -> 5.87 % N=104765 C=98909 S=5711 D=145 I=289
+Mandarin -> 5.86 % N=104762 C=98909 S=5708 D=145 I=289
+English -> 0.00 % N=0 C=0 S=0 D=0 I=0
+Other -> 100.00 % N=3 C=0 S=3 D=0 I=0
+```
--- a/demos/audio_content_search/README.md
+++ b/demos/audio_content_search/README.md
--- a/demos/audio_content_search/README_cn.md
+++ b/demos/audio_content_search/README_cn.md
--- a/demos/audio_content_search/acs_clinet.py
+++ b/demos/audio_content_search/acs_clinet.py
--- a/demos/audio_content_search/conf/acs_application.yaml
+++ b/demos/audio_content_search/conf/acs_application.yaml
--- a/demos/audio_content_search/conf/words.txt
+++ b/demos/audio_content_search/conf/words.txt
--- a/demos/audio_content_search/conf/ws_conformer_application.yaml
+++ b/demos/audio_content_search/conf/ws_conformer_application.yaml
--- a/demos/audio_content_search/conf/ws_conformer_wenetspeech_application.yaml
+++ b/demos/audio_content_search/conf/ws_conformer_wenetspeech_application.yaml
--- a/demos/audio_content_search/requirements.txt
+++ b/demos/audio_content_search/requirements.txt
--- a/demos/audio_content_search/streaming_asr_server.py
+++ b/demos/audio_content_search/streaming_asr_server.py
--- a/demos/audio_searching/README.md
+++ b/demos/audio_searching/README.md
--- a/demos/audio_searching/README_cn.md
+++ b/demos/audio_searching/README_cn.md
--- a/demos/audio_searching/docker-compose.yaml
+++ b/demos/audio_searching/docker-compose.yaml
--- a/demos/audio_searching/img/audio_searching.png
+++ b/demos/audio_searching/img/audio_searching.png
--- a/demos/audio_searching/img/insert.png
+++ b/demos/audio_searching/img/insert.png
--- a/demos/audio_searching/img/result.png
+++ b/demos/audio_searching/img/result.png
--- a/demos/audio_searching/img/search.png
+++ b/demos/audio_searching/img/search.png
--- a/demos/audio_searching/requirements.txt
+++ b/demos/audio_searching/requirements.txt
--- a/demos/audio_searching/src/audio_search.py
+++ b/demos/audio_searching/src/audio_search.py
--- a/demos/audio_searching/src/config.py
+++ b/demos/audio_searching/src/config.py
--- a/demos/audio_searching/src/encode.py
+++ b/demos/audio_searching/src/encode.py
--- a/demos/audio_searching/src/logs.py
+++ b/demos/audio_searching/src/logs.py
--- a/demos/audio_searching/src/milvus_helpers.py
+++ b/demos/audio_searching/src/milvus_helpers.py
--- a/demos/audio_searching/src/mysql_helpers.py
+++ b/demos/audio_searching/src/mysql_helpers.py
--- a/demos/audio_searching/src/operations/init.py
+++ b/demos/audio_searching/src/operations/init.py
--- a/demos/audio_searching/src/operations/count.py
+++ b/demos/audio_searching/src/operations/count.py
--- a/demos/audio_searching/src/operations/drop.py
+++ b/demos/audio_searching/src/operations/drop.py
--- a/demos/audio_searching/src/operations/load.py
+++ b/demos/audio_searching/src/operations/load.py
--- a/demos/audio_searching/src/operations/search.py
+++ b/demos/audio_searching/src/operations/search.py
--- a/demos/audio_searching/src/test_audio_search.py
+++ b/demos/audio_searching/src/test_audio_search.py
--- a/demos/audio_searching/src/test_vpr_search.py
+++ b/demos/audio_searching/src/test_vpr_search.py
--- a/demos/audio_searching/src/vpr_search.py
+++ b/demos/audio_searching/src/vpr_search.py
--- a/demos/audio_tagging/README.md
+++ b/demos/audio_tagging/README.md
--- a/demos/audio_tagging/README_cn.md
+++ b/demos/audio_tagging/README_cn.md
--- a/demos/automatic_video_subtitiles/README.md
+++ b/demos/automatic_video_subtitiles/README.md
--- a/demos/automatic_video_subtitiles/README_cn.md
+++ b/demos/automatic_video_subtitiles/README_cn.md
--- a/demos/automatic_video_subtitiles/recognize.py
+++ b/demos/automatic_video_subtitiles/recognize.py
--- a/demos/custom_streaming_asr/README.md
+++ b/demos/custom_streaming_asr/README.md
--- a/demos/custom_streaming_asr/README_cn.md
+++ b/demos/custom_streaming_asr/README_cn.md
--- a/demos/custom_streaming_asr/path.sh
+++ b/demos/custom_streaming_asr/path.sh
--- a/demos/keyword_spotting/README.md
+++ b/demos/keyword_spotting/README.md
--- a/demos/keyword_spotting/README_cn.md
+++ b/demos/keyword_spotting/README_cn.md
--- a/demos/metaverse/README.md
+++ b/demos/metaverse/README.md
--- a/demos/metaverse/README_cn.md
+++ b/demos/metaverse/README_cn.md
--- a/demos/metaverse/sentences.txt
+++ b/demos/metaverse/sentences.txt
--- a/demos/punctuation_restoration/README.md
+++ b/demos/punctuation_restoration/README.md
--- a/demos/punctuation_restoration/README_cn.md
+++ b/demos/punctuation_restoration/README_cn.md
--- a/demos/speaker_verification/README.md
+++ b/demos/speaker_verification/README.md
--- a/demos/speaker_verification/README_cn.md
+++ b/demos/speaker_verification/README_cn.md
--- a/demos/speech_recognition/.gitignore
+++ b/demos/speech_recognition/.gitignore
--- a/demos/speech_recognition/README.md
+++ b/demos/speech_recognition/README.md
--- a/demos/speech_recognition/README_cn.md
+++ b/demos/speech_recognition/README_cn.md
--- a/demos/speech_server/.gitignore
+++ b/demos/speech_server/.gitignore
--- a/demos/speech_server/README.md
+++ b/demos/speech_server/README.md
--- a/demos/speech_server/README_cn.md
+++ b/demos/speech_server/README_cn.md
--- a/demos/speech_server/conf/application.yaml
+++ b/demos/speech_server/conf/application.yaml
--- a/Show More
+++ b/Show More