diff --git a/.flake8 b/.flake8
index 44685f23a..6b50de7ed 100644
--- a/.flake8
+++ b/.flake8
@@ -12,6 +12,8 @@ exclude =
.git,
# python cache
__pycache__,
+ # third party
+ utils/compute-wer.py,
third_party/,
# Provide a comma-separate list of glob patterns to include for checks.
filename =
diff --git a/README.md b/README.md
index 5093dbd67..9791b895f 100644
--- a/README.md
+++ b/README.md
@@ -1,19 +1,10 @@
([简体中文](./README_cn.md)|English)
+
+
-
-
-------------------------------------------------------------------------------------
-
@@ -28,6 +19,20 @@
+
+
+
**PaddleSpeech** is an open-source toolkit on [PaddlePaddle](https://github.com/PaddlePaddle/Paddle) platform for a variety of critical tasks in speech and audio, with the state-of-art and influential models.
@@ -142,26 +147,6 @@ For more synthesized audios, please refer to [PaddleSpeech Text-to-Speech sample
-### ⭐ Examples
-- **[PaddleBoBo](https://github.com/JiehangXie/PaddleBoBo): Use PaddleSpeech TTS to generate virtual human voice.**
-
-
-
-- [PaddleSpeech Demo Video](https://paddlespeech.readthedocs.io/en/latest/demo_video.html)
-
-- **[VTuberTalk](https://github.com/jerryuhoo/VTuberTalk): Use PaddleSpeech TTS and ASR to clone voice from videos.**
-
-
-
-
-
-### 🔥 Hot Activities
-
-- 2021.12.21~12.24
-
- 4 Days Live Courses: Depth interpretation of PaddleSpeech!
-
- **Courses videos and related materials: https://aistudio.baidu.com/aistudio/education/group/info/25130**
### Features
@@ -174,11 +159,22 @@ Via the easy-to-use, efficient, flexible and scalable implementation, our vision
- 🔬 *Integration of mainstream models and datasets*: the toolkit implements modules that participate in the whole pipeline of the speech tasks, and uses mainstream datasets like LibriSpeech, LJSpeech, AIShell, CSMSC, etc. See also [model list](#model-list) for more details.
- 🧩 *Cascaded models application*: as an extension of the typical traditional audio tasks, we combine the workflows of the aforementioned tasks with other fields like Natural language processing (NLP) and Computer Vision (CV).
-### Recent Update
+### 🔥 Hot Activities
+
+- 2021.12.21~12.24
+
+ 4 Days Live Courses: Depth interpretation of PaddleSpeech!
+
+ **Courses videos and related materials: https://aistudio.baidu.com/aistudio/education/group/info/25130**
+
+
+### Recent Update
+
+- 👏🏻 2022.04.28: PaddleSpeech Streaming Server is available for Automatic Speech Recognition and Text-to-Speech.
- 👏🏻 2022.03.28: PaddleSpeech Server is available for Audio Classification, Automatic Speech Recognition and Text-to-Speech.
- 👏🏻 2022.03.28: PaddleSpeech CLI is available for Speaker Verification.
- 🤗 2021.12.14: Our PaddleSpeech [ASR](https://huggingface.co/spaces/KPatrick/PaddleSpeechASR) and [TTS](https://huggingface.co/spaces/KPatrick/PaddleSpeechTTS) Demos on Hugging Face Spaces are available!
@@ -196,6 +192,7 @@ Via the easy-to-use, efficient, flexible and scalable implementation, our vision
We strongly recommend our users to install PaddleSpeech in **Linux** with *python>=3.7*.
Up to now, **Linux** supports CLI for the all our tasks, **Mac OSX** and **Windows** only supports PaddleSpeech CLI for Audio Classification, Speech-to-Text and Text-to-Speech. To install `PaddleSpeech`, please see [installation](./docs/source/install.md).
+
## Quick Start
@@ -238,7 +235,7 @@ paddlespeech tts --input "你好,欢迎使用飞桨深度学习框架!" --ou
**Batch Process**
```
echo -e "1 欢迎光临。\n2 谢谢惠顾。" | paddlespeech tts
-```
+```
**Shell Pipeline**
- ASR + Punctuation Restoration
@@ -257,16 +254,19 @@ If you want to try more functions like training and tuning, please have a look a
Developers can have a try of our speech server with [PaddleSpeech Server Command Line](./paddlespeech/server/README.md).
**Start server**
+
```shell
paddlespeech_server start --config_file ./paddlespeech/server/conf/application.yaml
```
**Access Speech Recognition Services**
+
```shell
paddlespeech_client asr --server_ip 127.0.0.1 --port 8090 --input input_16k.wav
```
**Access Text to Speech Services**
+
```shell
paddlespeech_client tts --server_ip 127.0.0.1 --port 8090 --input "您好,欢迎使用百度飞桨语音合成服务。" --output output.wav
```
@@ -280,6 +280,37 @@ paddlespeech_client cls --server_ip 127.0.0.1 --port 8090 --input input.wav
For more information about server command lines, please see: [speech server demos](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/demos/speech_server)
+
+## Quick Start Streaming Server
+
+Developers can have a try of [streaming asr](./demos/streaming_asr_server/README.md) and [streaming tts](./demos/streaming_tts_server/README.md) server.
+
+**Start Streaming Speech Recognition Server**
+
+```
+paddlespeech_server start --config_file ./demos/streaming_asr_server/conf/application.yaml
+```
+
+**Access Streaming Speech Recognition Services**
+
+```
+paddlespeech_client asr_online --server_ip 127.0.0.1 --port 8090 --input input_16k.wav
+```
+
+**Start Streaming Text to Speech Server**
+
+```
+paddlespeech_server start --config_file ./demos/streaming_tts_server/conf/tts_online_application.yaml
+```
+
+**Access Streaming Text to Speech Services**
+
+```
+paddlespeech_client tts_online --server_ip 127.0.0.1 --port 8092 --protocol http --input "您好,欢迎使用百度飞桨语音合成服务。" --output output.wav
+```
+
+For more information please see: [streaming asr](./demos/streaming_asr_server/README.md) and [streaming tts](./demos/streaming_tts_server/README.md)
+
## Model List
@@ -589,15 +620,31 @@ Normally, [Speech SoTA](https://paperswithcode.com/area/speech), [Audio SoTA](ht
The Text-to-Speech module is originally called [Parakeet](https://github.com/PaddlePaddle/Parakeet), and now merged with this repository. If you are interested in academic research about this task, please see [TTS research overview](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/docs/source/tts#overview). Also, [this document](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/tts/models_introduction.md) is a good guideline for the pipeline components.
+
+## ⭐ Examples
+- **[PaddleBoBo](https://github.com/JiehangXie/PaddleBoBo): Use PaddleSpeech TTS to generate virtual human voice.**
+
+
+
+- [PaddleSpeech Demo Video](https://paddlespeech.readthedocs.io/en/latest/demo_video.html)
+
+- **[VTuberTalk](https://github.com/jerryuhoo/VTuberTalk): Use PaddleSpeech TTS and ASR to clone voice from videos.**
+
+
+
+
+
+
## Citation
To cite PaddleSpeech for research, please use the following format.
```tex
-@misc{ppspeech2021,
-title={PaddleSpeech, a toolkit for audio processing based on PaddlePaddle.},
-author={PaddlePaddle Authors},
-howpublished = {\url{https://github.com/PaddlePaddle/PaddleSpeech}},
-year={2021}
+@inproceedings{zhang2022paddlespeech,
+ title = {PaddleSpeech: An Easy-to-Use All-in-One Speech Toolkit},
+ author = {Hui Zhang, Tian Yuan, Junkun Chen, Xintong Li, Renjie Zheng, Yuxin Huang, Xiaojie Chen, Enlei Gong, Zeyu Chen, Xiaoguang Hu, dianhai yu, Yanjun Ma, Liang Huang},
+ booktitle = {Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Demonstrations},
+ year = {2022},
+ publisher = {Association for Computational Linguistics},
}
@inproceedings{zheng2021fused,
@@ -654,7 +701,6 @@ You are warmly welcome to submit questions in [discussions](https://github.com/P
## Acknowledgement
-
- Many thanks to [yeyupiaoling](https://github.com/yeyupiaoling)/[PPASR](https://github.com/yeyupiaoling/PPASR)/[PaddlePaddle-DeepSpeech](https://github.com/yeyupiaoling/PaddlePaddle-DeepSpeech)/[VoiceprintRecognition-PaddlePaddle](https://github.com/yeyupiaoling/VoiceprintRecognition-PaddlePaddle)/[AudioClassification-PaddlePaddle](https://github.com/yeyupiaoling/AudioClassification-PaddlePaddle) for years of attention, constructive advice and great help.
- Many thanks to [mymagicpower](https://github.com/mymagicpower) for the Java implementation of ASR upon [short](https://github.com/mymagicpower/AIAS/tree/main/3_audio_sdks/asr_sdk) and [long](https://github.com/mymagicpower/AIAS/tree/main/3_audio_sdks/asr_long_audio_sdk) audio files.
- Many thanks to [JiehangXie](https://github.com/JiehangXie)/[PaddleBoBo](https://github.com/JiehangXie/PaddleBoBo) for developing Virtual Uploader(VUP)/Virtual YouTuber(VTuber) with PaddleSpeech TTS function.
diff --git a/README_cn.md b/README_cn.md
index 5dab7fa0c..497863dbc 100644
--- a/README_cn.md
+++ b/README_cn.md
@@ -2,26 +2,45 @@
-
-------------------------------------------------------------------------------------
-
+
+
+
+
+
+
+
+------------------------------------------------------------------------------------
+
+
+
+
+
+
**PaddleSpeech** 是基于飞桨 [PaddlePaddle](https://github.com/PaddlePaddle/Paddle) 的语音方向的开源模型库,用于语音和音频中的各种关键任务的开发,包含大量基于深度学习前沿和有影响力的模型,一些典型的应用示例如下:
##### 语音识别
@@ -57,7 +78,6 @@ from https://github.com/18F/open-source-guide/blob/18f-pages/pages/making-readme
我认为跑步最重要的就是给我带来了身体健康。 |
-
@@ -143,19 +163,6 @@ from https://github.com/18F/open-source-guide/blob/18f-pages/pages/making-readme
-### ⭐ 应用案例
-- **[PaddleBoBo](https://github.com/JiehangXie/PaddleBoBo): 使用 PaddleSpeech 的语音合成模块生成虚拟人的声音。**
-
-
-
-- [PaddleSpeech 示例视频](https://paddlespeech.readthedocs.io/en/latest/demo_video.html)
-
-
-- **[VTuberTalk](https://github.com/jerryuhoo/VTuberTalk): 使用 PaddleSpeech 的语音合成和语音识别从视频中克隆人声。**
-
-
-
-
### 🔥 热门活动
@@ -164,27 +171,32 @@ from https://github.com/18F/open-source-guide/blob/18f-pages/pages/making-readme
4 日直播课: 深度解读 PaddleSpeech 语音技术!
**直播回放与课件资料: https://aistudio.baidu.com/aistudio/education/group/info/25130**
-### 特性
-本项目采用了易用、高效、灵活以及可扩展的实现,旨在为工业应用、学术研究提供更好的支持,实现的功能包含训练、推断以及测试模块,以及部署过程,主要包括
-- 📦 **易用性**: 安装门槛低,可使用 [CLI](#quick-start) 快速开始。
-- 🏆 **对标 SoTA**: 提供了高速、轻量级模型,且借鉴了最前沿的技术。
-- 💯 **基于规则的中文前端**: 我们的前端包含文本正则化和字音转换(G2P)。此外,我们使用自定义语言规则来适应中文语境。
-- **多种工业界以及学术界主流功能支持**:
- - 🛎️ 典型音频任务: 本工具包提供了音频任务如音频分类、语音翻译、自动语音识别、文本转语音、语音合成等任务的实现。
- - 🔬 主流模型及数据集: 本工具包实现了参与整条语音任务流水线的各个模块,并且采用了主流数据集如 LibriSpeech、LJSpeech、AIShell、CSMSC,详情请见 [模型列表](#model-list)。
- - 🧩 级联模型应用: 作为传统语音任务的扩展,我们结合了自然语言处理、计算机视觉等任务,实现更接近实际需求的产业级应用。
### 近期更新
+- 👏🏻 2022.04.28: PaddleSpeech Streaming Server 上线! 覆盖了语音识别和语音合成。
- 👏🏻 2022.03.28: PaddleSpeech Server 上线! 覆盖了声音分类、语音识别、以及语音合成。
- 👏🏻 2022.03.28: PaddleSpeech CLI 上线声纹验证。
- 🤗 2021.12.14: Our PaddleSpeech [ASR](https://huggingface.co/spaces/KPatrick/PaddleSpeechASR) and [TTS](https://huggingface.co/spaces/KPatrick/PaddleSpeechTTS) Demos on Hugging Face Spaces are available!
- 👏🏻 2021.12.10: PaddleSpeech CLI 上线!覆盖了声音分类、语音识别、语音翻译(英译中)以及语音合成。
+
+### 特性
+
+本项目采用了易用、高效、灵活以及可扩展的实现,旨在为工业应用、学术研究提供更好的支持,实现的功能包含训练、推断以及测试模块,以及部署过程,主要包括
+- 📦 **易用性**: 安装门槛低,可使用 [CLI](#quick-start) 快速开始。
+- 🏆 **对标 SoTA**: 提供了高速、轻量级模型,且借鉴了最前沿的技术。
+- 💯 **基于规则的中文前端**: 我们的前端包含文本正则化和字音转换(G2P)。此外,我们使用自定义语言规则来适应中文语境。
+- **多种工业界以及学术界主流功能支持**:
+ - 🛎️ 典型音频任务: 本工具包提供了音频任务如音频分类、语音翻译、自动语音识别、文本转语音、语音合成等任务的实现。
+ - 🔬 主流模型及数据集: 本工具包实现了参与整条语音任务流水线的各个模块,并且采用了主流数据集如 LibriSpeech、LJSpeech、AIShell、CSMSC,详情请见 [模型列表](#model-list)。
+ - 🧩 级联模型应用: 作为传统语音任务的扩展,我们结合了自然语言处理、计算机视觉等任务,实现更接近实际需求的产业级应用。
+
+
### 技术交流群
微信扫描二维码(好友申请通过后回复【语音】)加入官方交流群,获得更高效的问题答疑,与各行各业开发者充分交流,期待您的加入。
@@ -192,11 +204,13 @@ from https://github.com/18F/open-source-guide/blob/18f-pages/pages/making-readme
+
## 安装
我们强烈建议用户在 **Linux** 环境下,*3.7* 以上版本的 *python* 上安装 PaddleSpeech。
目前为止,**Linux** 支持声音分类、语音识别、语音合成和语音翻译四种功能,**Mac OSX、 Windows** 下暂不支持语音翻译功能。 想了解具体安装细节,可以参考[安装文档](./docs/source/install_cn.md)。
+
## 快速开始
安装完成后,开发者可以通过命令行快速开始,改变 `--input` 可以尝试用自己的音频或文本测试。
@@ -232,7 +246,7 @@ paddlespeech tts --input "你好,欢迎使用百度飞桨深度学习框架!
**批处理**
```
echo -e "1 欢迎光临。\n2 谢谢惠顾。" | paddlespeech tts
-```
+```
**Shell管道**
ASR + Punc:
@@ -269,6 +283,38 @@ paddlespeech_client cls --server_ip 127.0.0.1 --port 8090 --input input.wav
更多服务相关的命令行使用信息,请参考 [demos](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/demos/speech_server)
+
+## 快速使用流式服务
+
+开发者可以尝试[流式ASR](./demos/streaming_asr_server/README.md)和 [流式TTS](./demos/streaming_tts_server/README.md)服务.
+
+**启动流式ASR服务**
+
+```
+paddlespeech_server start --config_file ./demos/streaming_asr_server/conf/application.yaml
+```
+
+**访问流式ASR服务**
+
+```
+paddlespeech_client asr_online --server_ip 127.0.0.1 --port 8090 --input input_16k.wav
+```
+
+**启动流式TTS服务**
+
+```
+paddlespeech_server start --config_file ./demos/streaming_tts_server/conf/tts_online_application.yaml
+```
+
+**访问流式TTS服务**
+
+```
+paddlespeech_client tts_online --server_ip 127.0.0.1 --port 8092 --protocol http --input "您好,欢迎使用百度飞桨语音合成服务。" --output output.wav
+```
+
+更多信息参看: [流式 ASR](./demos/streaming_asr_server/README.md) 和 [流式 TTS](./demos/streaming_tts_server/README.md)
+
+
## 模型列表
PaddleSpeech 支持很多主流的模型,并提供了预训练模型,详情请见[模型列表](./docs/source/released_model.md)。
@@ -582,15 +628,31 @@ PaddleSpeech 的 **语音合成** 主要包含三个模块:文本前端、声
语音合成模块最初被称为 [Parakeet](https://github.com/PaddlePaddle/Parakeet),现在与此仓库合并。如果您对该任务的学术研究感兴趣,请参阅 [TTS 研究概述](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/docs/source/tts#overview)。此外,[模型介绍](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/tts/models_introduction.md) 是了解语音合成流程的一个很好的指南。
+## ⭐ 应用案例
+- **[PaddleBoBo](https://github.com/JiehangXie/PaddleBoBo): 使用 PaddleSpeech 的语音合成模块生成虚拟人的声音。**
+
+
+
+- [PaddleSpeech 示例视频](https://paddlespeech.readthedocs.io/en/latest/demo_video.html)
+
+
+- **[VTuberTalk](https://github.com/jerryuhoo/VTuberTalk): 使用 PaddleSpeech 的语音合成和语音识别从视频中克隆人声。**
+
+
+
+
+
+
## 引用
要引用 PaddleSpeech 进行研究,请使用以下格式进行引用。
```text
-@misc{ppspeech2021,
-title={PaddleSpeech, a toolkit for audio processing based on PaddlePaddle.},
-author={PaddlePaddle Authors},
-howpublished = {\url{https://github.com/PaddlePaddle/PaddleSpeech}},
-year={2021}
+@inproceedings{zhang2022paddlespeech,
+ title = {PaddleSpeech: An Easy-to-Use All-in-One Speech Toolkit},
+ author = {Hui Zhang, Tian Yuan, Junkun Chen, Xintong Li, Renjie Zheng, Yuxin Huang, Xiaojie Chen, Enlei Gong, Zeyu Chen, Xiaoguang Hu, dianhai yu, Yanjun Ma, Liang Huang},
+ booktitle = {Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Demonstrations},
+ year = {2022},
+ publisher = {Association for Computational Linguistics},
}
@inproceedings{zheng2021fused,
@@ -657,6 +719,7 @@ year={2021}
- 非常感谢 [jerryuhoo](https://github.com/jerryuhoo)/[VTuberTalk](https://github.com/jerryuhoo/VTuberTalk) 基于 PaddleSpeech 的 TTS GUI 界面和基于 ASR 制作数据集的相关代码。
+
此外,PaddleSpeech 依赖于许多开源存储库。有关更多信息,请参阅 [references](./docs/source/reference.md)。
## License
diff --git a/paddleaudio/.gitignore b/audio/.gitignore
similarity index 100%
rename from paddleaudio/.gitignore
rename to audio/.gitignore
diff --git a/paddleaudio/CHANGELOG.md b/audio/CHANGELOG.md
similarity index 100%
rename from paddleaudio/CHANGELOG.md
rename to audio/CHANGELOG.md
diff --git a/paddleaudio/README.md b/audio/README.md
similarity index 100%
rename from paddleaudio/README.md
rename to audio/README.md
diff --git a/paddleaudio/docs/Makefile b/audio/docs/Makefile
similarity index 100%
rename from paddleaudio/docs/Makefile
rename to audio/docs/Makefile
diff --git a/paddleaudio/docs/README.md b/audio/docs/README.md
similarity index 100%
rename from paddleaudio/docs/README.md
rename to audio/docs/README.md
diff --git a/paddleaudio/docs/images/paddle.png b/audio/docs/images/paddle.png
similarity index 100%
rename from paddleaudio/docs/images/paddle.png
rename to audio/docs/images/paddle.png
diff --git a/paddleaudio/docs/make.bat b/audio/docs/make.bat
similarity index 100%
rename from paddleaudio/docs/make.bat
rename to audio/docs/make.bat
diff --git a/paddleaudio/docs/source/_static/custom.css b/audio/docs/source/_static/custom.css
similarity index 100%
rename from paddleaudio/docs/source/_static/custom.css
rename to audio/docs/source/_static/custom.css
diff --git a/paddleaudio/docs/source/_templates/module.rst_t b/audio/docs/source/_templates/module.rst_t
similarity index 100%
rename from paddleaudio/docs/source/_templates/module.rst_t
rename to audio/docs/source/_templates/module.rst_t
diff --git a/paddleaudio/docs/source/_templates/package.rst_t b/audio/docs/source/_templates/package.rst_t
similarity index 100%
rename from paddleaudio/docs/source/_templates/package.rst_t
rename to audio/docs/source/_templates/package.rst_t
diff --git a/paddleaudio/docs/source/_templates/toc.rst_t b/audio/docs/source/_templates/toc.rst_t
similarity index 100%
rename from paddleaudio/docs/source/_templates/toc.rst_t
rename to audio/docs/source/_templates/toc.rst_t
diff --git a/paddleaudio/docs/source/conf.py b/audio/docs/source/conf.py
similarity index 100%
rename from paddleaudio/docs/source/conf.py
rename to audio/docs/source/conf.py
diff --git a/paddleaudio/docs/source/index.rst b/audio/docs/source/index.rst
similarity index 100%
rename from paddleaudio/docs/source/index.rst
rename to audio/docs/source/index.rst
diff --git a/paddleaudio/paddleaudio/__init__.py b/audio/paddleaudio/__init__.py
similarity index 100%
rename from paddleaudio/paddleaudio/__init__.py
rename to audio/paddleaudio/__init__.py
diff --git a/paddleaudio/paddleaudio/backends/__init__.py b/audio/paddleaudio/backends/__init__.py
similarity index 100%
rename from paddleaudio/paddleaudio/backends/__init__.py
rename to audio/paddleaudio/backends/__init__.py
diff --git a/paddleaudio/paddleaudio/backends/soundfile_backend.py b/audio/paddleaudio/backends/soundfile_backend.py
similarity index 100%
rename from paddleaudio/paddleaudio/backends/soundfile_backend.py
rename to audio/paddleaudio/backends/soundfile_backend.py
diff --git a/paddleaudio/paddleaudio/backends/sox_backend.py b/audio/paddleaudio/backends/sox_backend.py
similarity index 100%
rename from paddleaudio/paddleaudio/backends/sox_backend.py
rename to audio/paddleaudio/backends/sox_backend.py
diff --git a/paddleaudio/paddleaudio/compliance/__init__.py b/audio/paddleaudio/compliance/__init__.py
similarity index 100%
rename from paddleaudio/paddleaudio/compliance/__init__.py
rename to audio/paddleaudio/compliance/__init__.py
diff --git a/paddleaudio/paddleaudio/compliance/kaldi.py b/audio/paddleaudio/compliance/kaldi.py
similarity index 100%
rename from paddleaudio/paddleaudio/compliance/kaldi.py
rename to audio/paddleaudio/compliance/kaldi.py
diff --git a/paddleaudio/paddleaudio/compliance/librosa.py b/audio/paddleaudio/compliance/librosa.py
similarity index 100%
rename from paddleaudio/paddleaudio/compliance/librosa.py
rename to audio/paddleaudio/compliance/librosa.py
diff --git a/paddleaudio/paddleaudio/datasets/__init__.py b/audio/paddleaudio/datasets/__init__.py
similarity index 96%
rename from paddleaudio/paddleaudio/datasets/__init__.py
rename to audio/paddleaudio/datasets/__init__.py
index ebd4af984..f95fad305 100644
--- a/paddleaudio/paddleaudio/datasets/__init__.py
+++ b/audio/paddleaudio/datasets/__init__.py
@@ -13,6 +13,7 @@
# limitations under the License.
from .esc50 import ESC50
from .gtzan import GTZAN
+from .hey_snips import HeySnips
from .rirs_noises import OpenRIRNoise
from .tess import TESS
from .urban_sound import UrbanSound8K
diff --git a/paddleaudio/paddleaudio/datasets/dataset.py b/audio/paddleaudio/datasets/dataset.py
similarity index 76%
rename from paddleaudio/paddleaudio/datasets/dataset.py
rename to audio/paddleaudio/datasets/dataset.py
index 06e2df6d0..488187a69 100644
--- a/paddleaudio/paddleaudio/datasets/dataset.py
+++ b/audio/paddleaudio/datasets/dataset.py
@@ -17,6 +17,8 @@ import numpy as np
import paddle
from ..backends import load as load_audio
+from ..compliance.kaldi import fbank as kaldi_fbank
+from ..compliance.kaldi import mfcc as kaldi_mfcc
from ..compliance.librosa import melspectrogram
from ..compliance.librosa import mfcc
@@ -24,6 +26,8 @@ feat_funcs = {
'raw': None,
'melspectrogram': melspectrogram,
'mfcc': mfcc,
+ 'kaldi_fbank': kaldi_fbank,
+ 'kaldi_mfcc': kaldi_mfcc,
}
@@ -73,16 +77,24 @@ class AudioClassificationDataset(paddle.io.Dataset):
feat_func = feat_funcs[self.feat_type]
record = {}
- record['feat'] = feat_func(
- waveform, sample_rate,
- **self.feat_config) if feat_func else waveform
+ if self.feat_type in ['kaldi_fbank', 'kaldi_mfcc']:
+ waveform = paddle.to_tensor(waveform).unsqueeze(0) # (C, T)
+ record['feat'] = feat_func(
+ waveform=waveform, sr=self.sample_rate, **self.feat_config)
+ else:
+ record['feat'] = feat_func(
+ waveform, sample_rate,
+ **self.feat_config) if feat_func else waveform
record['label'] = label
return record
def __getitem__(self, idx):
record = self._convert_to_record(idx)
- return np.array(record['feat']).transpose(), np.array(
- record['label'], dtype=np.int64)
+ if self.feat_type in ['kaldi_fbank', 'kaldi_mfcc']:
+ return self.keys[idx], record['feat'], record['label']
+ else:
+ return np.array(record['feat']).transpose(), np.array(
+ record['label'], dtype=np.int64)
def __len__(self):
return len(self.files)
diff --git a/paddleaudio/paddleaudio/datasets/esc50.py b/audio/paddleaudio/datasets/esc50.py
similarity index 100%
rename from paddleaudio/paddleaudio/datasets/esc50.py
rename to audio/paddleaudio/datasets/esc50.py
diff --git a/paddleaudio/paddleaudio/datasets/gtzan.py b/audio/paddleaudio/datasets/gtzan.py
similarity index 100%
rename from paddleaudio/paddleaudio/datasets/gtzan.py
rename to audio/paddleaudio/datasets/gtzan.py
diff --git a/audio/paddleaudio/datasets/hey_snips.py b/audio/paddleaudio/datasets/hey_snips.py
new file mode 100644
index 000000000..7a67b843b
--- /dev/null
+++ b/audio/paddleaudio/datasets/hey_snips.py
@@ -0,0 +1,74 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import collections
+import json
+import os
+from typing import List
+from typing import Tuple
+
+from .dataset import AudioClassificationDataset
+
+__all__ = ['HeySnips']
+
+
+class HeySnips(AudioClassificationDataset):
+ meta_info = collections.namedtuple('META_INFO',
+ ('key', 'label', 'duration', 'wav'))
+
+ def __init__(self,
+ data_dir: os.PathLike,
+ mode: str='train',
+ feat_type: str='kaldi_fbank',
+ sample_rate: int=16000,
+ **kwargs):
+ self.data_dir = data_dir
+ files, labels = self._get_data(mode)
+ super(HeySnips, self).__init__(
+ files=files,
+ labels=labels,
+ feat_type=feat_type,
+ sample_rate=sample_rate,
+ **kwargs)
+
+ def _get_meta_info(self, mode) -> List[collections.namedtuple]:
+ ret = []
+ with open(os.path.join(self.data_dir, '{}.json'.format(mode)),
+ 'r') as f:
+ data = json.load(f)
+ for item in data:
+ sample = collections.OrderedDict()
+ if item['duration'] > 0:
+ sample['key'] = item['id']
+ sample['label'] = 0 if item['is_hotword'] == 1 else -1
+ sample['duration'] = item['duration']
+ sample['wav'] = os.path.join(self.data_dir,
+ item['audio_file_path'])
+ ret.append(self.meta_info(*sample.values()))
+ return ret
+
+ def _get_data(self, mode: str) -> Tuple[List[str], List[int]]:
+ meta_info = self._get_meta_info(mode)
+
+ files = []
+ labels = []
+ self.keys = []
+ self.durations = []
+ for sample in meta_info:
+ key, target, duration, wav = sample
+ files.append(wav)
+ labels.append(int(target))
+ self.keys.append(key)
+ self.durations.append(float(duration))
+
+ return files, labels
diff --git a/paddleaudio/paddleaudio/datasets/rirs_noises.py b/audio/paddleaudio/datasets/rirs_noises.py
similarity index 100%
rename from paddleaudio/paddleaudio/datasets/rirs_noises.py
rename to audio/paddleaudio/datasets/rirs_noises.py
diff --git a/paddleaudio/paddleaudio/datasets/tess.py b/audio/paddleaudio/datasets/tess.py
similarity index 100%
rename from paddleaudio/paddleaudio/datasets/tess.py
rename to audio/paddleaudio/datasets/tess.py
diff --git a/paddleaudio/paddleaudio/datasets/urban_sound.py b/audio/paddleaudio/datasets/urban_sound.py
similarity index 100%
rename from paddleaudio/paddleaudio/datasets/urban_sound.py
rename to audio/paddleaudio/datasets/urban_sound.py
diff --git a/paddleaudio/paddleaudio/datasets/voxceleb.py b/audio/paddleaudio/datasets/voxceleb.py
similarity index 100%
rename from paddleaudio/paddleaudio/datasets/voxceleb.py
rename to audio/paddleaudio/datasets/voxceleb.py
diff --git a/paddleaudio/paddleaudio/features/__init__.py b/audio/paddleaudio/features/__init__.py
similarity index 100%
rename from paddleaudio/paddleaudio/features/__init__.py
rename to audio/paddleaudio/features/__init__.py
diff --git a/paddleaudio/paddleaudio/features/layers.py b/audio/paddleaudio/features/layers.py
similarity index 100%
rename from paddleaudio/paddleaudio/features/layers.py
rename to audio/paddleaudio/features/layers.py
diff --git a/paddleaudio/paddleaudio/functional/__init__.py b/audio/paddleaudio/functional/__init__.py
similarity index 100%
rename from paddleaudio/paddleaudio/functional/__init__.py
rename to audio/paddleaudio/functional/__init__.py
diff --git a/paddleaudio/paddleaudio/functional/functional.py b/audio/paddleaudio/functional/functional.py
similarity index 100%
rename from paddleaudio/paddleaudio/functional/functional.py
rename to audio/paddleaudio/functional/functional.py
diff --git a/paddleaudio/paddleaudio/functional/window.py b/audio/paddleaudio/functional/window.py
similarity index 100%
rename from paddleaudio/paddleaudio/functional/window.py
rename to audio/paddleaudio/functional/window.py
diff --git a/paddleaudio/paddleaudio/io/__init__.py b/audio/paddleaudio/io/__init__.py
similarity index 100%
rename from paddleaudio/paddleaudio/io/__init__.py
rename to audio/paddleaudio/io/__init__.py
diff --git a/paddleaudio/paddleaudio/metric/__init__.py b/audio/paddleaudio/metric/__init__.py
similarity index 100%
rename from paddleaudio/paddleaudio/metric/__init__.py
rename to audio/paddleaudio/metric/__init__.py
diff --git a/paddleaudio/paddleaudio/metric/dtw.py b/audio/paddleaudio/metric/dtw.py
similarity index 100%
rename from paddleaudio/paddleaudio/metric/dtw.py
rename to audio/paddleaudio/metric/dtw.py
diff --git a/paddleaudio/paddleaudio/metric/eer.py b/audio/paddleaudio/metric/eer.py
similarity index 100%
rename from paddleaudio/paddleaudio/metric/eer.py
rename to audio/paddleaudio/metric/eer.py
diff --git a/paddleaudio/paddleaudio/sox_effects/__init__.py b/audio/paddleaudio/sox_effects/__init__.py
similarity index 100%
rename from paddleaudio/paddleaudio/sox_effects/__init__.py
rename to audio/paddleaudio/sox_effects/__init__.py
diff --git a/paddleaudio/paddleaudio/utils/__init__.py b/audio/paddleaudio/utils/__init__.py
similarity index 100%
rename from paddleaudio/paddleaudio/utils/__init__.py
rename to audio/paddleaudio/utils/__init__.py
diff --git a/paddleaudio/paddleaudio/utils/download.py b/audio/paddleaudio/utils/download.py
similarity index 100%
rename from paddleaudio/paddleaudio/utils/download.py
rename to audio/paddleaudio/utils/download.py
diff --git a/paddleaudio/paddleaudio/utils/env.py b/audio/paddleaudio/utils/env.py
similarity index 100%
rename from paddleaudio/paddleaudio/utils/env.py
rename to audio/paddleaudio/utils/env.py
diff --git a/paddleaudio/paddleaudio/utils/error.py b/audio/paddleaudio/utils/error.py
similarity index 100%
rename from paddleaudio/paddleaudio/utils/error.py
rename to audio/paddleaudio/utils/error.py
diff --git a/paddleaudio/paddleaudio/utils/log.py b/audio/paddleaudio/utils/log.py
similarity index 100%
rename from paddleaudio/paddleaudio/utils/log.py
rename to audio/paddleaudio/utils/log.py
diff --git a/paddleaudio/paddleaudio/utils/numeric.py b/audio/paddleaudio/utils/numeric.py
similarity index 100%
rename from paddleaudio/paddleaudio/utils/numeric.py
rename to audio/paddleaudio/utils/numeric.py
diff --git a/paddleaudio/paddleaudio/utils/time.py b/audio/paddleaudio/utils/time.py
similarity index 100%
rename from paddleaudio/paddleaudio/utils/time.py
rename to audio/paddleaudio/utils/time.py
diff --git a/paddleaudio/setup.py b/audio/setup.py
similarity index 99%
rename from paddleaudio/setup.py
rename to audio/setup.py
index aac389302..ffee6f9d2 100644
--- a/paddleaudio/setup.py
+++ b/audio/setup.py
@@ -19,7 +19,7 @@ from setuptools.command.install import install
from setuptools.command.test import test
# set the version here
-VERSION = '0.2.1'
+VERSION = '1.0.0a'
# Inspired by the example at https://pytest.org/latest/goodpractises.html
diff --git a/paddleaudio/tests/.gitkeep b/audio/tests/.gitkeep
similarity index 100%
rename from paddleaudio/tests/.gitkeep
rename to audio/tests/.gitkeep
diff --git a/paddleaudio/tests/backends/__init__.py b/audio/tests/backends/__init__.py
similarity index 100%
rename from paddleaudio/tests/backends/__init__.py
rename to audio/tests/backends/__init__.py
diff --git a/paddleaudio/tests/backends/base.py b/audio/tests/backends/base.py
similarity index 100%
rename from paddleaudio/tests/backends/base.py
rename to audio/tests/backends/base.py
diff --git a/paddleaudio/tests/backends/soundfile/__init__.py b/audio/tests/backends/soundfile/__init__.py
similarity index 100%
rename from paddleaudio/tests/backends/soundfile/__init__.py
rename to audio/tests/backends/soundfile/__init__.py
diff --git a/paddleaudio/tests/backends/soundfile/test_io.py b/audio/tests/backends/soundfile/test_io.py
similarity index 100%
rename from paddleaudio/tests/backends/soundfile/test_io.py
rename to audio/tests/backends/soundfile/test_io.py
index 0f7580a40..9d092902d 100644
--- a/paddleaudio/tests/backends/soundfile/test_io.py
+++ b/audio/tests/backends/soundfile/test_io.py
@@ -16,9 +16,9 @@ import os
import unittest
import numpy as np
+import paddleaudio
import soundfile as sf
-import paddleaudio
from ..base import BackendTest
diff --git a/paddleaudio/tests/benchmark/README.md b/audio/tests/benchmark/README.md
similarity index 100%
rename from paddleaudio/tests/benchmark/README.md
rename to audio/tests/benchmark/README.md
diff --git a/paddleaudio/tests/benchmark/log_melspectrogram.py b/audio/tests/benchmark/log_melspectrogram.py
similarity index 99%
rename from paddleaudio/tests/benchmark/log_melspectrogram.py
rename to audio/tests/benchmark/log_melspectrogram.py
index 5230acd42..9832aed4d 100644
--- a/paddleaudio/tests/benchmark/log_melspectrogram.py
+++ b/audio/tests/benchmark/log_melspectrogram.py
@@ -17,11 +17,10 @@ import urllib.request
import librosa
import numpy as np
import paddle
+import paddleaudio
import torch
import torchaudio
-import paddleaudio
-
wav_url = 'https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav'
if not os.path.isfile(os.path.basename(wav_url)):
urllib.request.urlretrieve(wav_url, os.path.basename(wav_url))
diff --git a/paddleaudio/tests/benchmark/melspectrogram.py b/audio/tests/benchmark/melspectrogram.py
similarity index 99%
rename from paddleaudio/tests/benchmark/melspectrogram.py
rename to audio/tests/benchmark/melspectrogram.py
index e0b79b45a..5fe3f2481 100644
--- a/paddleaudio/tests/benchmark/melspectrogram.py
+++ b/audio/tests/benchmark/melspectrogram.py
@@ -17,11 +17,10 @@ import urllib.request
import librosa
import numpy as np
import paddle
+import paddleaudio
import torch
import torchaudio
-import paddleaudio
-
wav_url = 'https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav'
if not os.path.isfile(os.path.basename(wav_url)):
urllib.request.urlretrieve(wav_url, os.path.basename(wav_url))
diff --git a/paddleaudio/tests/benchmark/mfcc.py b/audio/tests/benchmark/mfcc.py
similarity index 99%
rename from paddleaudio/tests/benchmark/mfcc.py
rename to audio/tests/benchmark/mfcc.py
index 2572ff33d..c6a8c85f9 100644
--- a/paddleaudio/tests/benchmark/mfcc.py
+++ b/audio/tests/benchmark/mfcc.py
@@ -17,11 +17,10 @@ import urllib.request
import librosa
import numpy as np
import paddle
+import paddleaudio
import torch
import torchaudio
-import paddleaudio
-
wav_url = 'https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav'
if not os.path.isfile(os.path.basename(wav_url)):
urllib.request.urlretrieve(wav_url, os.path.basename(wav_url))
diff --git a/paddleaudio/tests/features/__init__.py b/audio/tests/features/__init__.py
similarity index 100%
rename from paddleaudio/tests/features/__init__.py
rename to audio/tests/features/__init__.py
diff --git a/paddleaudio/tests/features/base.py b/audio/tests/features/base.py
similarity index 99%
rename from paddleaudio/tests/features/base.py
rename to audio/tests/features/base.py
index 725e1e2e7..476f6b8ee 100644
--- a/paddleaudio/tests/features/base.py
+++ b/audio/tests/features/base.py
@@ -17,7 +17,6 @@ import urllib.request
import numpy as np
import paddle
-
from paddleaudio import load
wav_url = 'https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav'
diff --git a/paddleaudio/tests/features/test_istft.py b/audio/tests/features/test_istft.py
similarity index 100%
rename from paddleaudio/tests/features/test_istft.py
rename to audio/tests/features/test_istft.py
index 23371200b..9cf8cdd65 100644
--- a/paddleaudio/tests/features/test_istft.py
+++ b/audio/tests/features/test_istft.py
@@ -15,9 +15,9 @@ import unittest
import numpy as np
import paddle
+from paddleaudio.functional.window import get_window
from .base import FeatTest
-from paddleaudio.functional.window import get_window
from paddlespeech.s2t.transform.spectrogram import IStft
from paddlespeech.s2t.transform.spectrogram import Stft
diff --git a/paddleaudio/tests/features/test_kaldi.py b/audio/tests/features/test_kaldi.py
similarity index 100%
rename from paddleaudio/tests/features/test_kaldi.py
rename to audio/tests/features/test_kaldi.py
index 6e826aaa7..00a576f6f 100644
--- a/paddleaudio/tests/features/test_kaldi.py
+++ b/audio/tests/features/test_kaldi.py
@@ -15,10 +15,10 @@ import unittest
import numpy as np
import paddle
+import paddleaudio
import torch
import torchaudio
-import paddleaudio
from .base import FeatTest
diff --git a/paddleaudio/tests/features/test_librosa.py b/audio/tests/features/test_librosa.py
similarity index 100%
rename from paddleaudio/tests/features/test_librosa.py
rename to audio/tests/features/test_librosa.py
index cf0c98c72..a1d3e8400 100644
--- a/paddleaudio/tests/features/test_librosa.py
+++ b/audio/tests/features/test_librosa.py
@@ -16,11 +16,11 @@ import unittest
import librosa
import numpy as np
import paddle
-
import paddleaudio
-from .base import FeatTest
from paddleaudio.functional.window import get_window
+from .base import FeatTest
+
class TestLibrosa(FeatTest):
def initParmas(self):
diff --git a/paddleaudio/tests/features/test_log_melspectrogram.py b/audio/tests/features/test_log_melspectrogram.py
similarity index 100%
rename from paddleaudio/tests/features/test_log_melspectrogram.py
rename to audio/tests/features/test_log_melspectrogram.py
index 6bae2df3f..0383c2b8b 100644
--- a/paddleaudio/tests/features/test_log_melspectrogram.py
+++ b/audio/tests/features/test_log_melspectrogram.py
@@ -15,8 +15,8 @@ import unittest
import numpy as np
import paddle
-
import paddleaudio
+
from .base import FeatTest
from paddlespeech.s2t.transform.spectrogram import LogMelSpectrogram
diff --git a/paddleaudio/tests/features/test_spectrogram.py b/audio/tests/features/test_spectrogram.py
similarity index 100%
rename from paddleaudio/tests/features/test_spectrogram.py
rename to audio/tests/features/test_spectrogram.py
index 50b21403b..1774fe619 100644
--- a/paddleaudio/tests/features/test_spectrogram.py
+++ b/audio/tests/features/test_spectrogram.py
@@ -15,8 +15,8 @@ import unittest
import numpy as np
import paddle
-
import paddleaudio
+
from .base import FeatTest
from paddlespeech.s2t.transform.spectrogram import Spectrogram
diff --git a/paddleaudio/tests/features/test_stft.py b/audio/tests/features/test_stft.py
similarity index 100%
rename from paddleaudio/tests/features/test_stft.py
rename to audio/tests/features/test_stft.py
index c64b5ebe6..58792ffe2 100644
--- a/paddleaudio/tests/features/test_stft.py
+++ b/audio/tests/features/test_stft.py
@@ -15,9 +15,9 @@ import unittest
import numpy as np
import paddle
+from paddleaudio.functional.window import get_window
from .base import FeatTest
-from paddleaudio.functional.window import get_window
from paddlespeech.s2t.transform.spectrogram import Stft
diff --git a/demos/README.md b/demos/README.md
index 36e93dbf1..8abd67249 100644
--- a/demos/README.md
+++ b/demos/README.md
@@ -10,6 +10,8 @@ The directory containes many speech applications in multi scenarios.
* metaverse - 2D AR with TTS
* punctuation_restoration - restore punctuation from raw text
* speech recogintion - recognize text of an audio file
+* speech server - Server for Speech Task, e.g. ASR,TTS,CLS
+* streaming asr server - receive audio stream from websocket, and recognize to transcript.
* speech translation - end to end speech translation
* story talker - book reader based on OCR and TTS
* style_fs2 - multi style control for FastSpeech2 model
diff --git a/demos/README_cn.md b/demos/README_cn.md
index add6e25f5..471342127 100644
--- a/demos/README_cn.md
+++ b/demos/README_cn.md
@@ -10,6 +10,8 @@
* 元宇宙 - 基于语音合成的 2D 增强现实。
* 标点恢复 - 通常作为语音识别的文本后处理任务,为一段无标点的纯文本添加相应的标点符号。
* 语音识别 - 识别一段音频中包含的语音文字。
+* 语音服务 - 离线语音服务,包括ASR、TTS、CLS等
+* 流式语音识别服务 - 流式输入语音数据流识别音频中的文字
* 语音翻译 - 实时识别音频中的语言,并同时翻译成目标语言。
* 会说话的故事书 - 基于 OCR 和语音合成的会说话的故事书。
* 个性化语音合成 - 基于 FastSpeech2 模型的个性化语音合成。
diff --git a/demos/audio_searching/src/vpr_search.py b/demos/audio_searching/src/vpr_search.py
index 94974d0d8..2780dfb3b 100644
--- a/demos/audio_searching/src/vpr_search.py
+++ b/demos/audio_searching/src/vpr_search.py
@@ -19,6 +19,7 @@ from fastapi import FastAPI
from fastapi import File
from fastapi import Form
from fastapi import UploadFile
+from logs import LOGGER
from mysql_helpers import MySQLHelper
from operations.count import do_count_vpr
from operations.count import do_get
@@ -31,8 +32,6 @@ from starlette.middleware.cors import CORSMiddleware
from starlette.requests import Request
from starlette.responses import FileResponse
-from logs import LOGGER
-
app = FastAPI()
app.add_middleware(
CORSMiddleware,
diff --git a/demos/speaker_verification/README.md b/demos/speaker_verification/README.md
index 7d7180ae9..b79f3f7a1 100644
--- a/demos/speaker_verification/README.md
+++ b/demos/speaker_verification/README.md
@@ -1,5 +1,5 @@
([简体中文](./README_cn.md)|English)
-# Speech Verification)
+# Speech Verification
## Introduction
diff --git a/demos/speech_recognition/README.md b/demos/speech_recognition/README.md
index 636548801..6493e8e61 100644
--- a/demos/speech_recognition/README.md
+++ b/demos/speech_recognition/README.md
@@ -24,13 +24,13 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee
- Command Line(Recommended)
```bash
# Chinese
- paddlespeech asr --input ./zh.wav
+ paddlespeech asr --input ./zh.wav -v
# English
- paddlespeech asr --model transformer_librispeech --lang en --input ./en.wav
+ paddlespeech asr --model transformer_librispeech --lang en --input ./en.wav -v
# Chinese ASR + Punctuation Restoration
- paddlespeech asr --input ./zh.wav | paddlespeech text --task punc
+ paddlespeech asr --input ./zh.wav -v | paddlespeech text --task punc -v
```
- (It doesn't matter if package `paddlespeech-ctcdecoders` is not found, this package is optional.)
+ (If you don't want to see the log information, you can remove "-v". Besides, it doesn't matter if package `paddlespeech-ctcdecoders` is not found, this package is optional.)
Usage:
```bash
@@ -45,6 +45,7 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee
- `ckpt_path`: Model checkpoint. Use pretrained model when it is None. Default: `None`.
- `yes`: No additional parameters required. Once set this parameter, it means accepting the request of the program by default, which includes transforming the audio sample rate. Default: `False`.
- `device`: Choose device to execute model inference. Default: default device of paddlepaddle in current environment.
+ - `verbose`: Show the log information.
Output:
```bash
@@ -84,8 +85,12 @@ Here is a list of pretrained models released by PaddleSpeech that can be used by
| Model | Language | Sample Rate
| :--- | :---: | :---: |
-| conformer_wenetspeech| zh| 16k
-| transformer_librispeech| en| 16k
+| conformer_wenetspeech | zh | 16k
+| conformer_online_multicn | zh | 16k
+| conformer_aishell | zh | 16k
+| conformer_online_aishell | zh | 16k
+| transformer_librispeech | en | 16k
+| deepspeech2online_wenetspeech | zh | 16k
| deepspeech2offline_aishell| zh| 16k
| deepspeech2online_aishell | zh | 16k
-|deepspeech2offline_librispeech|en| 16k
+| deepspeech2offline_librispeech | en | 16k
diff --git a/demos/speech_recognition/README_cn.md b/demos/speech_recognition/README_cn.md
index 8033dbd81..8d631d89c 100644
--- a/demos/speech_recognition/README_cn.md
+++ b/demos/speech_recognition/README_cn.md
@@ -22,13 +22,13 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee
- 命令行 (推荐使用)
```bash
# 中文
- paddlespeech asr --input ./zh.wav
+ paddlespeech asr --input ./zh.wav -v
# 英文
- paddlespeech asr --model transformer_librispeech --lang en --input ./en.wav
+ paddlespeech asr --model transformer_librispeech --lang en --input ./en.wav -v
# 中文 + 标点恢复
- paddlespeech asr --input ./zh.wav | paddlespeech text --task punc
+ paddlespeech asr --input ./zh.wav -v | paddlespeech text --task punc -v
```
- (如果显示 `paddlespeech-ctcdecoders` 这个 python 包没有找到的 Error,没有关系,这个包是非必须的。)
+ (如果不想显示 log 信息,可以不使用"-v", 另外如果显示 `paddlespeech-ctcdecoders` 这个 python 包没有找到的 Error,没有关系,这个包是非必须的。)
使用方法:
```bash
@@ -43,6 +43,7 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee
- `ckpt_path`:模型参数文件,若不设置则下载预训练模型使用,默认值:`None`。
- `yes`;不需要设置额外的参数,一旦设置了该参数,说明你默认同意程序的所有请求,其中包括自动转换输入音频的采样率。默认值:`False`。
- `device`:执行预测的设备,默认值:当前系统下 paddlepaddle 的默认 device。
+ - `verbose`: 如果使用,显示 logger 信息。
输出:
```bash
@@ -82,7 +83,11 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee
| 模型 | 语言 | 采样率
| :--- | :---: | :---: |
| conformer_wenetspeech | zh | 16k
+| conformer_online_multicn | zh | 16k
+| conformer_aishell | zh | 16k
+| conformer_online_aishell | zh | 16k
| transformer_librispeech | en | 16k
+| deepspeech2online_wenetspeech | zh | 16k
| deepspeech2offline_aishell| zh| 16k
| deepspeech2online_aishell | zh | 16k
| deepspeech2offline_librispeech | en | 16k
diff --git a/demos/speech_server/README_cn.md b/demos/speech_server/README_cn.md
index 17a01f0bb..4a7c7447e 100644
--- a/demos/speech_server/README_cn.md
+++ b/demos/speech_server/README_cn.md
@@ -86,9 +86,6 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee
```
paddlespeech_client asr --server_ip 127.0.0.1 --port 8090 --input ./zh.wav
- # 流式ASR
- paddlespeech_client asr_online --server_ip 127.0.0.1 --port 8091 --input ./zh.wav
-
```
使用帮助:
diff --git a/demos/streaming_asr_server/README.md b/demos/streaming_asr_server/README.md
new file mode 100644
index 000000000..3de2f3862
--- /dev/null
+++ b/demos/streaming_asr_server/README.md
@@ -0,0 +1,358 @@
+([简体中文](./README_cn.md)|English)
+
+# Speech Server
+
+## Introduction
+This demo is an implementation of starting the streaming speech service and accessing the service. It can be achieved with a single command using `paddlespeech_server` and `paddlespeech_client` or a few lines of code in python.
+
+Streaming ASR server only support `websocket` protocol, and doesn't support `http` protocol.
+
+## Usage
+### 1. Installation
+see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md).
+
+It is recommended to use **paddlepaddle 2.2.1** or above.
+You can choose one way from meduim and hard to install paddlespeech.
+
+### 2. Prepare config File
+The configuration file can be found in `conf/ws_application.yaml` 和 `conf/ws_conformer_application.yaml`.
+
+At present, the speech tasks integrated by the model include: DeepSpeech2 and conformer.
+
+
+The input of ASR client demo should be a WAV file(`.wav`), and the sample rate must be the same as the model.
+
+Here are sample files for thisASR client demo that can be downloaded:
+```bash
+wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav
+```
+
+### 3. Server Usage
+- Command Line (Recommended)
+
+ ```bash
+ # in PaddleSpeech/demos/streaming_asr_server start the service
+ paddlespeech_server start --config_file ./conf/ws_conformer_application.yaml
+ ```
+
+ Usage:
+
+ ```bash
+ paddlespeech_server start --help
+ ```
+ Arguments:
+ - `config_file`: yaml file of the app, defalut: `./conf/application.yaml`
+ - `log_file`: log file. Default: `./log/paddlespeech.log`
+
+ Output:
+ ```bash
+ [2022-04-21 15:52:18,126] [ INFO] - create the online asr engine instance
+ [2022-04-21 15:52:18,127] [ INFO] - paddlespeech_server set the device: cpu
+ [2022-04-21 15:52:18,128] [ INFO] - Load the pretrained model, tag = conformer_online_multicn-zh-16k
+ [2022-04-21 15:52:18,128] [ INFO] - File /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k/asr1_chunk_conformer_multi_cn_ckpt_0.2.3.model.tar.gz md5 checking...
+ [2022-04-21 15:52:18,727] [ INFO] - Use pretrained model stored in: /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k
+ [2022-04-21 15:52:18,727] [ INFO] - /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k
+ [2022-04-21 15:52:18,727] [ INFO] - /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k/model.yaml
+ [2022-04-21 15:52:18,727] [ INFO] - /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k/exp/chunk_conformer/checkpoints/multi_cn.pdparams
+ [2022-04-21 15:52:18,727] [ INFO] - /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k/exp/chunk_conformer/checkpoints/multi_cn.pdparams
+ [2022-04-21 15:52:19,446] [ INFO] - start to create the stream conformer asr engine
+ [2022-04-21 15:52:19,473] [ INFO] - model name: conformer_online
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ [2022-04-21 15:52:21,731] [ INFO] - create the transformer like model success
+ [2022-04-21 15:52:21,733] [ INFO] - Initialize ASR server engine successfully.
+ INFO: Started server process [11173]
+ [2022-04-21 15:52:21] [INFO] [server.py:75] Started server process [11173]
+ INFO: Waiting for application startup.
+ [2022-04-21 15:52:21] [INFO] [on.py:45] Waiting for application startup.
+ INFO: Application startup complete.
+ [2022-04-21 15:52:21] [INFO] [on.py:59] Application startup complete.
+ /home/users/xiongxinlei/.conda/envs/paddlespeech/lib/python3.9/asyncio/base_events.py:1460: DeprecationWarning: The loop argument is deprecated since Python 3.8, and scheduled for removal in Python 3.10.
+ infos = await tasks.gather(*fs, loop=self)
+ /home/users/xiongxinlei/.conda/envs/paddlespeech/lib/python3.9/asyncio/base_events.py:1518: DeprecationWarning: The loop argument is deprecated since Python 3.8, and scheduled for removal in Python 3.10.
+ await tasks.sleep(0, loop=self)
+ INFO: Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit)
+ [2022-04-21 15:52:21] [INFO] [server.py:206] Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit)
+ ```
+
+- Python API
+ ```python
+ # in PaddleSpeech/demos/streaming_asr_server directory
+ from paddlespeech.server.bin.paddlespeech_server import ServerExecutor
+
+ server_executor = ServerExecutor()
+ server_executor(
+ config_file="./conf/ws_conformer_application.yaml",
+ log_file="./log/paddlespeech.log")
+ ```
+
+ Output:
+ ```bash
+ [2022-04-21 15:52:18,126] [ INFO] - create the online asr engine instance
+ [2022-04-21 15:52:18,127] [ INFO] - paddlespeech_server set the device: cpu
+ [2022-04-21 15:52:18,128] [ INFO] - Load the pretrained model, tag = conformer_online_multicn-zh-16k
+ [2022-04-21 15:52:18,128] [ INFO] - File /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k/asr1_chunk_conformer_multi_cn_ckpt_0.2.3.model.tar.gz md5 checking...
+ [2022-04-21 15:52:18,727] [ INFO] - Use pretrained model stored in: /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k
+ [2022-04-21 15:52:18,727] [ INFO] - /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k
+ [2022-04-21 15:52:18,727] [ INFO] - /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k/model.yaml
+ [2022-04-21 15:52:18,727] [ INFO] - /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k/exp/chunk_conformer/checkpoints/multi_cn.pdparams
+ [2022-04-21 15:52:18,727] [ INFO] - /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k/exp/chunk_conformer/checkpoints/multi_cn.pdparams
+ [2022-04-21 15:52:19,446] [ INFO] - start to create the stream conformer asr engine
+ [2022-04-21 15:52:19,473] [ INFO] - model name: conformer_online
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ [2022-04-21 15:52:21,731] [ INFO] - create the transformer like model success
+ [2022-04-21 15:52:21,733] [ INFO] - Initialize ASR server engine successfully.
+ INFO: Started server process [11173]
+ [2022-04-21 15:52:21] [INFO] [server.py:75] Started server process [11173]
+ INFO: Waiting for application startup.
+ [2022-04-21 15:52:21] [INFO] [on.py:45] Waiting for application startup.
+ INFO: Application startup complete.
+ [2022-04-21 15:52:21] [INFO] [on.py:59] Application startup complete.
+ /home/users/xiongxinlei/.conda/envs/paddlespeech/lib/python3.9/asyncio/base_events.py:1460: DeprecationWarning: The loop argument is deprecated since Python 3.8, and scheduled for removal in Python 3.10.
+ infos = await tasks.gather(*fs, loop=self)
+ /home/users/xiongxinlei/.conda/envs/paddlespeech/lib/python3.9/asyncio/base_events.py:1518: DeprecationWarning: The loop argument is deprecated since Python 3.8, and scheduled for removal in Python 3.10.
+ await tasks.sleep(0, loop=self)
+ INFO: Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit)
+ [2022-04-21 15:52:21] [INFO] [server.py:206] Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit)
+ ```
+
+
+### 4. ASR Client Usage
+
+**Note:** The response time will be slightly longer when using the client for the first time
+- Command Line (Recommended)
+ ```
+ paddlespeech_client asr_online --server_ip 127.0.0.1 --port 8090 --input ./zh.wav
+ ```
+
+ Usage:
+
+ ```bash
+ paddlespeech_client asr_online --help
+ ```
+ Arguments:
+ - `server_ip`: server ip. Default: 127.0.0.1
+ - `port`: server port. Default: 8090
+ - `input`(required): Audio file to be recognized.
+ - `sample_rate`: Audio ampling rate, default: 16000.
+ - `lang`: Language. Default: "zh_cn".
+ - `audio_format`: Audio format. Default: "wav".
+ - `punc.server_ip`: punctuation server ip. Default: None.
+ - `punc.server_port`: punctuation server port. Default: None.
+
+ Output:
+ ```bash
+ [2022-04-21 15:59:03,904] [ INFO] - receive msg={"status": "ok", "signal": "server_ready"}
+ [2022-04-21 15:59:03,960] [ INFO] - receive msg={'asr_results': ''}
+ [2022-04-21 15:59:03,973] [ INFO] - receive msg={'asr_results': ''}
+ [2022-04-21 15:59:03,987] [ INFO] - receive msg={'asr_results': ''}
+ [2022-04-21 15:59:04,000] [ INFO] - receive msg={'asr_results': ''}
+ [2022-04-21 15:59:04,012] [ INFO] - receive msg={'asr_results': ''}
+ [2022-04-21 15:59:04,024] [ INFO] - receive msg={'asr_results': ''}
+ [2022-04-21 15:59:04,036] [ INFO] - receive msg={'asr_results': ''}
+ [2022-04-21 15:59:04,047] [ INFO] - receive msg={'asr_results': ''}
+ [2022-04-21 15:59:04,607] [ INFO] - receive msg={'asr_results': ''}
+ [2022-04-21 15:59:04,620] [ INFO] - receive msg={'asr_results': ''}
+ [2022-04-21 15:59:04,633] [ INFO] - receive msg={'asr_results': ''}
+ [2022-04-21 15:59:04,645] [ INFO] - receive msg={'asr_results': ''}
+ [2022-04-21 15:59:04,657] [ INFO] - receive msg={'asr_results': ''}
+ [2022-04-21 15:59:04,669] [ INFO] - receive msg={'asr_results': ''}
+ [2022-04-21 15:59:04,680] [ INFO] - receive msg={'asr_results': ''}
+ [2022-04-21 15:59:05,176] [ INFO] - receive msg={'asr_results': '我认为跑'}
+ [2022-04-21 15:59:05,185] [ INFO] - receive msg={'asr_results': '我认为跑'}
+ [2022-04-21 15:59:05,192] [ INFO] - receive msg={'asr_results': '我认为跑'}
+ [2022-04-21 15:59:05,200] [ INFO] - receive msg={'asr_results': '我认为跑'}
+ [2022-04-21 15:59:05,208] [ INFO] - receive msg={'asr_results': '我认为跑'}
+ [2022-04-21 15:59:05,216] [ INFO] - receive msg={'asr_results': '我认为跑'}
+ [2022-04-21 15:59:05,224] [ INFO] - receive msg={'asr_results': '我认为跑'}
+ [2022-04-21 15:59:05,232] [ INFO] - receive msg={'asr_results': '我认为跑'}
+ [2022-04-21 15:59:05,724] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'}
+ [2022-04-21 15:59:05,732] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'}
+ [2022-04-21 15:59:05,740] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'}
+ [2022-04-21 15:59:05,747] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'}
+ [2022-04-21 15:59:05,755] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'}
+ [2022-04-21 15:59:05,763] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'}
+ [2022-04-21 15:59:05,770] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'}
+ [2022-04-21 15:59:06,271] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'}
+ [2022-04-21 15:59:06,279] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'}
+ [2022-04-21 15:59:06,287] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'}
+ [2022-04-21 15:59:06,294] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'}
+ [2022-04-21 15:59:06,302] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'}
+ [2022-04-21 15:59:06,310] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'}
+ [2022-04-21 15:59:06,318] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'}
+ [2022-04-21 15:59:06,326] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'}
+ [2022-04-21 15:59:06,833] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'}
+ [2022-04-21 15:59:06,842] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'}
+ [2022-04-21 15:59:06,850] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'}
+ [2022-04-21 15:59:06,858] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'}
+ [2022-04-21 15:59:06,866] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'}
+ [2022-04-21 15:59:06,874] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'}
+ [2022-04-21 15:59:06,882] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'}
+ [2022-04-21 15:59:07,400] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'}
+ [2022-04-21 15:59:07,408] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'}
+ [2022-04-21 15:59:07,416] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'}
+ [2022-04-21 15:59:07,424] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'}
+ [2022-04-21 15:59:07,432] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'}
+ [2022-04-21 15:59:07,440] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'}
+ [2022-04-21 15:59:07,447] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'}
+ [2022-04-21 15:59:07,455] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'}
+ [2022-04-21 15:59:07,984] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'}
+ [2022-04-21 15:59:07,992] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'}
+ [2022-04-21 15:59:08,001] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'}
+ [2022-04-21 15:59:08,008] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'}
+ [2022-04-21 15:59:08,016] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'}
+ [2022-04-21 15:59:08,024] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'}
+ [2022-04-21 15:59:12,883] [ INFO] - final receive msg={'status': 'ok', 'signal': 'finished', 'asr_results': '我认为跑步最重要的就是给我带来了身体健康'}
+ [2022-04-21 15:59:12,884] [ INFO] - 我认为跑步最重要的就是给我带来了身体健康
+ [2022-04-21 15:59:12,884] [ INFO] - Response time 9.051567 s.
+
+ ```
+
+- Python API
+ ```python
+ from paddlespeech.server.bin.paddlespeech_client import ASROnlineClientExecutor
+
+ asrclient_executor = ASROnlineClientExecutor()
+ res = asrclient_executor(
+ input="./zh.wav",
+ server_ip="127.0.0.1",
+ port=8090,
+ sample_rate=16000,
+ lang="zh_cn",
+ audio_format="wav")
+ print(res)
+ ```
+
+ Output:
+ ```bash
+ [2022-04-21 15:59:03,904] [ INFO] - receive msg={"status": "ok", "signal": "server_ready"}
+ [2022-04-21 15:59:03,960] [ INFO] - receive msg={'asr_results': ''}
+ [2022-04-21 15:59:03,973] [ INFO] - receive msg={'asr_results': ''}
+ [2022-04-21 15:59:03,987] [ INFO] - receive msg={'asr_results': ''}
+ [2022-04-21 15:59:04,000] [ INFO] - receive msg={'asr_results': ''}
+ [2022-04-21 15:59:04,012] [ INFO] - receive msg={'asr_results': ''}
+ [2022-04-21 15:59:04,024] [ INFO] - receive msg={'asr_results': ''}
+ [2022-04-21 15:59:04,036] [ INFO] - receive msg={'asr_results': ''}
+ [2022-04-21 15:59:04,047] [ INFO] - receive msg={'asr_results': ''}
+ [2022-04-21 15:59:04,607] [ INFO] - receive msg={'asr_results': ''}
+ [2022-04-21 15:59:04,620] [ INFO] - receive msg={'asr_results': ''}
+ [2022-04-21 15:59:04,633] [ INFO] - receive msg={'asr_results': ''}
+ [2022-04-21 15:59:04,645] [ INFO] - receive msg={'asr_results': ''}
+ [2022-04-21 15:59:04,657] [ INFO] - receive msg={'asr_results': ''}
+ [2022-04-21 15:59:04,669] [ INFO] - receive msg={'asr_results': ''}
+ [2022-04-21 15:59:04,680] [ INFO] - receive msg={'asr_results': ''}
+ [2022-04-21 15:59:05,176] [ INFO] - receive msg={'asr_results': '我认为跑'}
+ [2022-04-21 15:59:05,185] [ INFO] - receive msg={'asr_results': '我认为跑'}
+ [2022-04-21 15:59:05,192] [ INFO] - receive msg={'asr_results': '我认为跑'}
+ [2022-04-21 15:59:05,200] [ INFO] - receive msg={'asr_results': '我认为跑'}
+ [2022-04-21 15:59:05,208] [ INFO] - receive msg={'asr_results': '我认为跑'}
+ [2022-04-21 15:59:05,216] [ INFO] - receive msg={'asr_results': '我认为跑'}
+ [2022-04-21 15:59:05,224] [ INFO] - receive msg={'asr_results': '我认为跑'}
+ [2022-04-21 15:59:05,232] [ INFO] - receive msg={'asr_results': '我认为跑'}
+ [2022-04-21 15:59:05,724] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'}
+ [2022-04-21 15:59:05,732] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'}
+ [2022-04-21 15:59:05,740] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'}
+ [2022-04-21 15:59:05,747] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'}
+ [2022-04-21 15:59:05,755] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'}
+ [2022-04-21 15:59:05,763] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'}
+ [2022-04-21 15:59:05,770] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'}
+ [2022-04-21 15:59:06,271] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'}
+ [2022-04-21 15:59:06,279] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'}
+ [2022-04-21 15:59:06,287] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'}
+ [2022-04-21 15:59:06,294] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'}
+ [2022-04-21 15:59:06,302] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'}
+ [2022-04-21 15:59:06,310] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'}
+ [2022-04-21 15:59:06,318] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'}
+ [2022-04-21 15:59:06,326] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'}
+ [2022-04-21 15:59:06,833] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'}
+ [2022-04-21 15:59:06,842] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'}
+ [2022-04-21 15:59:06,850] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'}
+ [2022-04-21 15:59:06,858] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'}
+ [2022-04-21 15:59:06,866] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'}
+ [2022-04-21 15:59:06,874] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'}
+ [2022-04-21 15:59:06,882] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'}
+ [2022-04-21 15:59:07,400] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'}
+ [2022-04-21 15:59:07,408] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'}
+ [2022-04-21 15:59:07,416] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'}
+ [2022-04-21 15:59:07,424] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'}
+ [2022-04-21 15:59:07,432] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'}
+ [2022-04-21 15:59:07,440] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'}
+ [2022-04-21 15:59:07,447] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'}
+ [2022-04-21 15:59:07,455] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'}
+ [2022-04-21 15:59:07,984] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'}
+ [2022-04-21 15:59:07,992] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'}
+ [2022-04-21 15:59:08,001] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'}
+ [2022-04-21 15:59:08,008] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'}
+ [2022-04-21 15:59:08,016] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'}
+ [2022-04-21 15:59:08,024] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'}
+ [2022-04-21 15:59:12,883] [ INFO] - final receive msg={'status': 'ok', 'signal': 'finished', 'asr_results': '我认为跑步最重要的就是给我带来了身体健康'}
+ ```
\ No newline at end of file
diff --git a/demos/streaming_asr_server/README_cn.md b/demos/streaming_asr_server/README_cn.md
new file mode 100644
index 000000000..bb1d37729
--- /dev/null
+++ b/demos/streaming_asr_server/README_cn.md
@@ -0,0 +1,365 @@
+([English](./README.md)|中文)
+
+# 语音服务
+
+## 介绍
+这个demo是一个启动流式语音服务和访问服务的实现。 它可以通过使用`paddlespeech_server` 和 `paddlespeech_client`的单个命令或 python 的几行代码来实现。
+
+**流式语音识别服务只支持 `weboscket` 协议,不支持 `http` 协议。**
+
+## 使用方法
+### 1. 安装
+安装 PaddleSpeech 的详细过程请看 [安装文档](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md)。
+
+推荐使用 **paddlepaddle 2.2.1** 或以上版本。
+你可以从medium,hard 两种方式中选择一种方式安装 PaddleSpeech。
+
+
+### 2. 准备配置文件
+
+流式ASR的服务启动脚本和服务测试脚本存放在 `PaddleSpeech/demos/streaming_asr_server` 目录。
+下载好 `PaddleSpeech` 之后,进入到 `PaddleSpeech/demos/streaming_asr_server` 目录。
+配置文件可参见该目录下 `conf/ws_application.yaml` 和 `conf/ws_conformer_application.yaml` 。
+
+目前服务集成的模型有: DeepSpeech2和 conformer模型,对应的配置文件如下:
+* DeepSpeech: `conf/ws_application.yaml`
+* conformer: `conf/ws_conformer_application.yaml`
+
+
+
+这个 ASR client 的输入应该是一个 WAV 文件(`.wav`),并且采样率必须与模型的采样率相同。
+
+可以下载此 ASR client的示例音频:
+```bash
+wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav
+```
+
+### 3. 服务端使用方法
+- 命令行 (推荐使用)
+
+ ```bash
+ # 在 PaddleSpeech/demos/streaming_asr_server 目录启动服务
+ paddlespeech_server start --config_file ./conf/ws_conformer_application.yaml
+ ```
+
+ 使用方法:
+
+ ```bash
+ paddlespeech_server start --help
+ ```
+ 参数:
+ - `config_file`: 服务的配置文件,默认: `./conf/application.yaml`
+ - `log_file`: log 文件. 默认:`./log/paddlespeech.log`
+
+ 输出:
+ ```bash
+ [2022-04-21 15:52:18,126] [ INFO] - create the online asr engine instance
+ [2022-04-21 15:52:18,127] [ INFO] - paddlespeech_server set the device: cpu
+ [2022-04-21 15:52:18,128] [ INFO] - Load the pretrained model, tag = conformer_online_multicn-zh-16k
+ [2022-04-21 15:52:18,128] [ INFO] - File /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k/asr1_chunk_conformer_multi_cn_ckpt_0.2.3.model.tar.gz md5 checking...
+ [2022-04-21 15:52:18,727] [ INFO] - Use pretrained model stored in: /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k
+ [2022-04-21 15:52:18,727] [ INFO] - /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k
+ [2022-04-21 15:52:18,727] [ INFO] - /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k/model.yaml
+ [2022-04-21 15:52:18,727] [ INFO] - /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k/exp/chunk_conformer/checkpoints/multi_cn.pdparams
+ [2022-04-21 15:52:18,727] [ INFO] - /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k/exp/chunk_conformer/checkpoints/multi_cn.pdparams
+ [2022-04-21 15:52:19,446] [ INFO] - start to create the stream conformer asr engine
+ [2022-04-21 15:52:19,473] [ INFO] - model name: conformer_online
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ [2022-04-21 15:52:21,731] [ INFO] - create the transformer like model success
+ [2022-04-21 15:52:21,733] [ INFO] - Initialize ASR server engine successfully.
+ INFO: Started server process [11173]
+ [2022-04-21 15:52:21] [INFO] [server.py:75] Started server process [11173]
+ INFO: Waiting for application startup.
+ [2022-04-21 15:52:21] [INFO] [on.py:45] Waiting for application startup.
+ INFO: Application startup complete.
+ [2022-04-21 15:52:21] [INFO] [on.py:59] Application startup complete.
+ /home/users/xiongxinlei/.conda/envs/paddlespeech/lib/python3.9/asyncio/base_events.py:1460: DeprecationWarning: The loop argument is deprecated since Python 3.8, and scheduled for removal in Python 3.10.
+ infos = await tasks.gather(*fs, loop=self)
+ /home/users/xiongxinlei/.conda/envs/paddlespeech/lib/python3.9/asyncio/base_events.py:1518: DeprecationWarning: The loop argument is deprecated since Python 3.8, and scheduled for removal in Python 3.10.
+ await tasks.sleep(0, loop=self)
+ INFO: Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit)
+ [2022-04-21 15:52:21] [INFO] [server.py:206] Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit)
+ ```
+
+- Python API
+ ```python
+ # 在 PaddleSpeech/demos/streaming_asr_server 目录
+ from paddlespeech.server.bin.paddlespeech_server import ServerExecutor
+
+ server_executor = ServerExecutor()
+ server_executor(
+ config_file="./conf/ws_conformer_application.yaml",
+ log_file="./log/paddlespeech.log")
+ ```
+
+ 输出:
+ ```bash
+ [2022-04-21 15:52:18,126] [ INFO] - create the online asr engine instance
+ [2022-04-21 15:52:18,127] [ INFO] - paddlespeech_server set the device: cpu
+ [2022-04-21 15:52:18,128] [ INFO] - Load the pretrained model, tag = conformer_online_multicn-zh-16k
+ [2022-04-21 15:52:18,128] [ INFO] - File /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k/asr1_chunk_conformer_multi_cn_ckpt_0.2.3.model.tar.gz md5 checking...
+ [2022-04-21 15:52:18,727] [ INFO] - Use pretrained model stored in: /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k
+ [2022-04-21 15:52:18,727] [ INFO] - /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k
+ [2022-04-21 15:52:18,727] [ INFO] - /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k/model.yaml
+ [2022-04-21 15:52:18,727] [ INFO] - /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k/exp/chunk_conformer/checkpoints/multi_cn.pdparams
+ [2022-04-21 15:52:18,727] [ INFO] - /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k/exp/chunk_conformer/checkpoints/multi_cn.pdparams
+ [2022-04-21 15:52:19,446] [ INFO] - start to create the stream conformer asr engine
+ [2022-04-21 15:52:19,473] [ INFO] - model name: conformer_online
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ set kaiming_uniform
+ [2022-04-21 15:52:21,731] [ INFO] - create the transformer like model success
+ [2022-04-21 15:52:21,733] [ INFO] - Initialize ASR server engine successfully.
+ INFO: Started server process [11173]
+ [2022-04-21 15:52:21] [INFO] [server.py:75] Started server process [11173]
+ INFO: Waiting for application startup.
+ [2022-04-21 15:52:21] [INFO] [on.py:45] Waiting for application startup.
+ INFO: Application startup complete.
+ [2022-04-21 15:52:21] [INFO] [on.py:59] Application startup complete.
+ /home/users/xiongxinlei/.conda/envs/paddlespeech/lib/python3.9/asyncio/base_events.py:1460: DeprecationWarning: The loop argument is deprecated since Python 3.8, and scheduled for removal in Python 3.10.
+ infos = await tasks.gather(*fs, loop=self)
+ /home/users/xiongxinlei/.conda/envs/paddlespeech/lib/python3.9/asyncio/base_events.py:1518: DeprecationWarning: The loop argument is deprecated since Python 3.8, and scheduled for removal in Python 3.10.
+ await tasks.sleep(0, loop=self)
+ INFO: Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit)
+ [2022-04-21 15:52:21] [INFO] [server.py:206] Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit)
+ ```
+
+### 4. ASR 客户端使用方法
+
+**注意:** 初次使用客户端时响应时间会略长
+- 命令行 (推荐使用)
+ ```
+ paddlespeech_client asr_online --server_ip 127.0.0.1 --port 8090 --input ./zh.wav
+ ```
+
+ 使用帮助:
+
+ ```bash
+ paddlespeech_client asr_online --help
+ ```
+
+ 参数:
+ - `server_ip`: 服务端ip地址,默认: 127.0.0.1。
+ - `port`: 服务端口,默认: 8090。
+ - `input`(必须输入): 用于识别的音频文件。
+ - `sample_rate`: 音频采样率,默认值:16000。
+ - `lang`: 模型语言,默认值:zh_cn。
+ - `audio_format`: 音频格式,默认值:wav。
+ - `punc.server_ip` 标点预测服务的ip。默认是None。
+ - `punc.server_port` 标点预测服务的端口port。默认是None。
+
+ 输出:
+
+ ```bash
+ [2022-04-21 15:59:03,904] [ INFO] - receive msg={"status": "ok", "signal": "server_ready"}
+ [2022-04-21 15:59:03,960] [ INFO] - receive msg={'asr_results': ''}
+ [2022-04-21 15:59:03,973] [ INFO] - receive msg={'asr_results': ''}
+ [2022-04-21 15:59:03,987] [ INFO] - receive msg={'asr_results': ''}
+ [2022-04-21 15:59:04,000] [ INFO] - receive msg={'asr_results': ''}
+ [2022-04-21 15:59:04,012] [ INFO] - receive msg={'asr_results': ''}
+ [2022-04-21 15:59:04,024] [ INFO] - receive msg={'asr_results': ''}
+ [2022-04-21 15:59:04,036] [ INFO] - receive msg={'asr_results': ''}
+ [2022-04-21 15:59:04,047] [ INFO] - receive msg={'asr_results': ''}
+ [2022-04-21 15:59:04,607] [ INFO] - receive msg={'asr_results': ''}
+ [2022-04-21 15:59:04,620] [ INFO] - receive msg={'asr_results': ''}
+ [2022-04-21 15:59:04,633] [ INFO] - receive msg={'asr_results': ''}
+ [2022-04-21 15:59:04,645] [ INFO] - receive msg={'asr_results': ''}
+ [2022-04-21 15:59:04,657] [ INFO] - receive msg={'asr_results': ''}
+ [2022-04-21 15:59:04,669] [ INFO] - receive msg={'asr_results': ''}
+ [2022-04-21 15:59:04,680] [ INFO] - receive msg={'asr_results': ''}
+ [2022-04-21 15:59:05,176] [ INFO] - receive msg={'asr_results': '我认为跑'}
+ [2022-04-21 15:59:05,185] [ INFO] - receive msg={'asr_results': '我认为跑'}
+ [2022-04-21 15:59:05,192] [ INFO] - receive msg={'asr_results': '我认为跑'}
+ [2022-04-21 15:59:05,200] [ INFO] - receive msg={'asr_results': '我认为跑'}
+ [2022-04-21 15:59:05,208] [ INFO] - receive msg={'asr_results': '我认为跑'}
+ [2022-04-21 15:59:05,216] [ INFO] - receive msg={'asr_results': '我认为跑'}
+ [2022-04-21 15:59:05,224] [ INFO] - receive msg={'asr_results': '我认为跑'}
+ [2022-04-21 15:59:05,232] [ INFO] - receive msg={'asr_results': '我认为跑'}
+ [2022-04-21 15:59:05,724] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'}
+ [2022-04-21 15:59:05,732] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'}
+ [2022-04-21 15:59:05,740] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'}
+ [2022-04-21 15:59:05,747] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'}
+ [2022-04-21 15:59:05,755] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'}
+ [2022-04-21 15:59:05,763] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'}
+ [2022-04-21 15:59:05,770] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'}
+ [2022-04-21 15:59:06,271] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'}
+ [2022-04-21 15:59:06,279] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'}
+ [2022-04-21 15:59:06,287] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'}
+ [2022-04-21 15:59:06,294] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'}
+ [2022-04-21 15:59:06,302] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'}
+ [2022-04-21 15:59:06,310] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'}
+ [2022-04-21 15:59:06,318] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'}
+ [2022-04-21 15:59:06,326] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'}
+ [2022-04-21 15:59:06,833] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'}
+ [2022-04-21 15:59:06,842] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'}
+ [2022-04-21 15:59:06,850] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'}
+ [2022-04-21 15:59:06,858] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'}
+ [2022-04-21 15:59:06,866] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'}
+ [2022-04-21 15:59:06,874] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'}
+ [2022-04-21 15:59:06,882] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'}
+ [2022-04-21 15:59:07,400] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'}
+ [2022-04-21 15:59:07,408] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'}
+ [2022-04-21 15:59:07,416] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'}
+ [2022-04-21 15:59:07,424] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'}
+ [2022-04-21 15:59:07,432] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'}
+ [2022-04-21 15:59:07,440] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'}
+ [2022-04-21 15:59:07,447] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'}
+ [2022-04-21 15:59:07,455] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'}
+ [2022-04-21 15:59:07,984] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'}
+ [2022-04-21 15:59:07,992] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'}
+ [2022-04-21 15:59:08,001] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'}
+ [2022-04-21 15:59:08,008] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'}
+ [2022-04-21 15:59:08,016] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'}
+ [2022-04-21 15:59:08,024] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'}
+ [2022-04-21 15:59:12,883] [ INFO] - final receive msg={'status': 'ok', 'signal': 'finished', 'asr_results': '我认为跑步最重要的就是给我带来了身体健康'}
+ [2022-04-21 15:59:12,884] [ INFO] - 我认为跑步最重要的就是给我带来了身体健康
+ [2022-04-21 15:59:12,884] [ INFO] - Response time 9.051567 s.
+ ```
+
+- Python API
+ ```python
+ from paddlespeech.server.bin.paddlespeech_client import ASROnlineClientExecutor
+
+ asrclient_executor = ASROnlineClientExecutor()
+ res = asrclient_executor(
+ input="./zh.wav",
+ server_ip="127.0.0.1",
+ port=8090,
+ sample_rate=16000,
+ lang="zh_cn",
+ audio_format="wav")
+ print(res)
+ ```
+
+ 输出:
+ ```bash
+ [2022-04-21 15:59:03,904] [ INFO] - receive msg={"status": "ok", "signal": "server_ready"}
+ [2022-04-21 15:59:03,960] [ INFO] - receive msg={'asr_results': ''}
+ [2022-04-21 15:59:03,973] [ INFO] - receive msg={'asr_results': ''}
+ [2022-04-21 15:59:03,987] [ INFO] - receive msg={'asr_results': ''}
+ [2022-04-21 15:59:04,000] [ INFO] - receive msg={'asr_results': ''}
+ [2022-04-21 15:59:04,012] [ INFO] - receive msg={'asr_results': ''}
+ [2022-04-21 15:59:04,024] [ INFO] - receive msg={'asr_results': ''}
+ [2022-04-21 15:59:04,036] [ INFO] - receive msg={'asr_results': ''}
+ [2022-04-21 15:59:04,047] [ INFO] - receive msg={'asr_results': ''}
+ [2022-04-21 15:59:04,607] [ INFO] - receive msg={'asr_results': ''}
+ [2022-04-21 15:59:04,620] [ INFO] - receive msg={'asr_results': ''}
+ [2022-04-21 15:59:04,633] [ INFO] - receive msg={'asr_results': ''}
+ [2022-04-21 15:59:04,645] [ INFO] - receive msg={'asr_results': ''}
+ [2022-04-21 15:59:04,657] [ INFO] - receive msg={'asr_results': ''}
+ [2022-04-21 15:59:04,669] [ INFO] - receive msg={'asr_results': ''}
+ [2022-04-21 15:59:04,680] [ INFO] - receive msg={'asr_results': ''}
+ [2022-04-21 15:59:05,176] [ INFO] - receive msg={'asr_results': '我认为跑'}
+ [2022-04-21 15:59:05,185] [ INFO] - receive msg={'asr_results': '我认为跑'}
+ [2022-04-21 15:59:05,192] [ INFO] - receive msg={'asr_results': '我认为跑'}
+ [2022-04-21 15:59:05,200] [ INFO] - receive msg={'asr_results': '我认为跑'}
+ [2022-04-21 15:59:05,208] [ INFO] - receive msg={'asr_results': '我认为跑'}
+ [2022-04-21 15:59:05,216] [ INFO] - receive msg={'asr_results': '我认为跑'}
+ [2022-04-21 15:59:05,224] [ INFO] - receive msg={'asr_results': '我认为跑'}
+ [2022-04-21 15:59:05,232] [ INFO] - receive msg={'asr_results': '我认为跑'}
+ [2022-04-21 15:59:05,724] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'}
+ [2022-04-21 15:59:05,732] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'}
+ [2022-04-21 15:59:05,740] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'}
+ [2022-04-21 15:59:05,747] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'}
+ [2022-04-21 15:59:05,755] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'}
+ [2022-04-21 15:59:05,763] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'}
+ [2022-04-21 15:59:05,770] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'}
+ [2022-04-21 15:59:06,271] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'}
+ [2022-04-21 15:59:06,279] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'}
+ [2022-04-21 15:59:06,287] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'}
+ [2022-04-21 15:59:06,294] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'}
+ [2022-04-21 15:59:06,302] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'}
+ [2022-04-21 15:59:06,310] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'}
+ [2022-04-21 15:59:06,318] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'}
+ [2022-04-21 15:59:06,326] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'}
+ [2022-04-21 15:59:06,833] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'}
+ [2022-04-21 15:59:06,842] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'}
+ [2022-04-21 15:59:06,850] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'}
+ [2022-04-21 15:59:06,858] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'}
+ [2022-04-21 15:59:06,866] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'}
+ [2022-04-21 15:59:06,874] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'}
+ [2022-04-21 15:59:06,882] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'}
+ [2022-04-21 15:59:07,400] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'}
+ [2022-04-21 15:59:07,408] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'}
+ [2022-04-21 15:59:07,416] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'}
+ [2022-04-21 15:59:07,424] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'}
+ [2022-04-21 15:59:07,432] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'}
+ [2022-04-21 15:59:07,440] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'}
+ [2022-04-21 15:59:07,447] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'}
+ [2022-04-21 15:59:07,455] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'}
+ [2022-04-21 15:59:07,984] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'}
+ [2022-04-21 15:59:07,992] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'}
+ [2022-04-21 15:59:08,001] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'}
+ [2022-04-21 15:59:08,008] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'}
+ [2022-04-21 15:59:08,016] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'}
+ [2022-04-21 15:59:08,024] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'}
+ [2022-04-21 15:59:12,883] [ INFO] - final receive msg={'status': 'ok', 'signal': 'finished', 'asr_results': '我认为跑步最重要的就是给我带来了身体健康'}
+ ```
diff --git a/demos/streaming_asr_server/conf/application.yaml b/demos/streaming_asr_server/conf/application.yaml
new file mode 100644
index 000000000..50c7a7277
--- /dev/null
+++ b/demos/streaming_asr_server/conf/application.yaml
@@ -0,0 +1,45 @@
+# This is the parameter configuration file for PaddleSpeech Serving.
+
+#################################################################################
+# SERVER SETTING #
+#################################################################################
+host: 0.0.0.0
+port: 8090
+
+# The task format in the engin_list is: _
+# task choices = ['asr_online']
+# protocol = ['websocket'] (only one can be selected).
+# websocket only support online engine type.
+protocol: 'websocket'
+engine_list: ['asr_online']
+
+
+#################################################################################
+# ENGINE CONFIG #
+#################################################################################
+
+################################### ASR #########################################
+################### speech task: asr; engine_type: online #######################
+asr_online:
+ model_type: 'conformer_online_multicn'
+ am_model: # the pdmodel file of am static model [optional]
+ am_params: # the pdiparams file of am static model [optional]
+ lang: 'zh'
+ sample_rate: 16000
+ cfg_path:
+ decode_method:
+ force_yes: True
+ device: # cpu or gpu:id
+ am_predictor_conf:
+ device: # set 'gpu:id' or 'cpu'
+ switch_ir_optim: True
+ glog_info: False # True -> print glog
+ summary: True # False -> do not show predictor config
+
+ chunk_buffer_conf:
+ window_n: 7 # frame
+ shift_n: 4 # frame
+ window_ms: 25 # ms
+ shift_ms: 10 # ms
+ sample_rate: 16000
+ sample_width: 2
\ No newline at end of file
diff --git a/demos/streaming_asr_server/conf/ws_application.yaml b/demos/streaming_asr_server/conf/ws_application.yaml
new file mode 100644
index 000000000..fc02f2ca4
--- /dev/null
+++ b/demos/streaming_asr_server/conf/ws_application.yaml
@@ -0,0 +1,47 @@
+# This is the parameter configuration file for PaddleSpeech Serving.
+
+#################################################################################
+# SERVER SETTING #
+#################################################################################
+host: 0.0.0.0
+port: 8090
+
+# The task format in the engin_list is: _
+# task choices = ['asr_online']
+# protocol = ['websocket'] (only one can be selected).
+# websocket only support online engine type.
+protocol: 'websocket'
+engine_list: ['asr_online']
+
+
+#################################################################################
+# ENGINE CONFIG #
+#################################################################################
+
+################################### ASR #########################################
+################### speech task: asr; engine_type: online #######################
+asr_online:
+ model_type: 'deepspeech2online_aishell'
+ am_model: # the pdmodel file of am static model [optional]
+ am_params: # the pdiparams file of am static model [optional]
+ lang: 'zh'
+ sample_rate: 16000
+ cfg_path:
+ decode_method:
+ force_yes: True
+
+ am_predictor_conf:
+ device: # set 'gpu:id' or 'cpu'
+ switch_ir_optim: True
+ glog_info: False # True -> print glog
+ summary: True # False -> do not show predictor config
+
+ chunk_buffer_conf:
+ frame_duration_ms: 80
+ shift_ms: 40
+ sample_rate: 16000
+ sample_width: 2
+ window_n: 7 # frame
+ shift_n: 4 # frame
+ window_ms: 20 # ms
+ shift_ms: 10 # ms
diff --git a/demos/streaming_asr_server/conf/ws_conformer_application.yaml b/demos/streaming_asr_server/conf/ws_conformer_application.yaml
new file mode 100644
index 000000000..50c7a7277
--- /dev/null
+++ b/demos/streaming_asr_server/conf/ws_conformer_application.yaml
@@ -0,0 +1,45 @@
+# This is the parameter configuration file for PaddleSpeech Serving.
+
+#################################################################################
+# SERVER SETTING #
+#################################################################################
+host: 0.0.0.0
+port: 8090
+
+# The task format in the engin_list is: _
+# task choices = ['asr_online']
+# protocol = ['websocket'] (only one can be selected).
+# websocket only support online engine type.
+protocol: 'websocket'
+engine_list: ['asr_online']
+
+
+#################################################################################
+# ENGINE CONFIG #
+#################################################################################
+
+################################### ASR #########################################
+################### speech task: asr; engine_type: online #######################
+asr_online:
+ model_type: 'conformer_online_multicn'
+ am_model: # the pdmodel file of am static model [optional]
+ am_params: # the pdiparams file of am static model [optional]
+ lang: 'zh'
+ sample_rate: 16000
+ cfg_path:
+ decode_method:
+ force_yes: True
+ device: # cpu or gpu:id
+ am_predictor_conf:
+ device: # set 'gpu:id' or 'cpu'
+ switch_ir_optim: True
+ glog_info: False # True -> print glog
+ summary: True # False -> do not show predictor config
+
+ chunk_buffer_conf:
+ window_n: 7 # frame
+ shift_n: 4 # frame
+ window_ms: 25 # ms
+ shift_ms: 10 # ms
+ sample_rate: 16000
+ sample_width: 2
\ No newline at end of file
diff --git a/demos/streaming_asr_server/run.sh b/demos/streaming_asr_server/run.sh
new file mode 100644
index 000000000..d2ca34475
--- /dev/null
+++ b/demos/streaming_asr_server/run.sh
@@ -0,0 +1,2 @@
+# start the streaming asr service
+paddlespeech_server start --config_file ./conf/ws_conformer_application.yaml
\ No newline at end of file
diff --git a/demos/streaming_asr_server/test.sh b/demos/streaming_asr_server/test.sh
new file mode 100644
index 000000000..fe8155cf3
--- /dev/null
+++ b/demos/streaming_asr_server/test.sh
@@ -0,0 +1,5 @@
+# download the test wav
+wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav
+
+# read the wav and pass it to service
+python3 websocket_client.py --wavfile ./zh.wav
diff --git a/paddlespeech/server/tests/asr/online/web/app.py b/demos/streaming_asr_server/web/app.py
similarity index 100%
rename from paddlespeech/server/tests/asr/online/web/app.py
rename to demos/streaming_asr_server/web/app.py
diff --git a/paddlespeech/server/tests/asr/online/web/paddle_web_demo.png b/demos/streaming_asr_server/web/paddle_web_demo.png
similarity index 100%
rename from paddlespeech/server/tests/asr/online/web/paddle_web_demo.png
rename to demos/streaming_asr_server/web/paddle_web_demo.png
diff --git a/paddlespeech/server/tests/asr/online/web/readme.md b/demos/streaming_asr_server/web/readme.md
similarity index 100%
rename from paddlespeech/server/tests/asr/online/web/readme.md
rename to demos/streaming_asr_server/web/readme.md
diff --git a/paddlespeech/server/tests/asr/online/web/static/css/font-awesome.min.css b/demos/streaming_asr_server/web/static/css/font-awesome.min.css
similarity index 100%
rename from paddlespeech/server/tests/asr/online/web/static/css/font-awesome.min.css
rename to demos/streaming_asr_server/web/static/css/font-awesome.min.css
diff --git a/paddlespeech/server/tests/asr/online/web/static/css/style.css b/demos/streaming_asr_server/web/static/css/style.css
similarity index 100%
rename from paddlespeech/server/tests/asr/online/web/static/css/style.css
rename to demos/streaming_asr_server/web/static/css/style.css
diff --git a/paddlespeech/server/tests/asr/online/web/static/fonts/FontAwesome.otf b/demos/streaming_asr_server/web/static/fonts/FontAwesome.otf
similarity index 100%
rename from paddlespeech/server/tests/asr/online/web/static/fonts/FontAwesome.otf
rename to demos/streaming_asr_server/web/static/fonts/FontAwesome.otf
diff --git a/paddlespeech/server/tests/asr/online/web/static/fonts/fontawesome-webfont.eot b/demos/streaming_asr_server/web/static/fonts/fontawesome-webfont.eot
similarity index 100%
rename from paddlespeech/server/tests/asr/online/web/static/fonts/fontawesome-webfont.eot
rename to demos/streaming_asr_server/web/static/fonts/fontawesome-webfont.eot
diff --git a/paddlespeech/server/tests/asr/online/web/static/fonts/fontawesome-webfont.svg b/demos/streaming_asr_server/web/static/fonts/fontawesome-webfont.svg
similarity index 100%
rename from paddlespeech/server/tests/asr/online/web/static/fonts/fontawesome-webfont.svg
rename to demos/streaming_asr_server/web/static/fonts/fontawesome-webfont.svg
diff --git a/paddlespeech/server/tests/asr/online/web/static/fonts/fontawesome-webfont.ttf b/demos/streaming_asr_server/web/static/fonts/fontawesome-webfont.ttf
similarity index 100%
rename from paddlespeech/server/tests/asr/online/web/static/fonts/fontawesome-webfont.ttf
rename to demos/streaming_asr_server/web/static/fonts/fontawesome-webfont.ttf
diff --git a/paddlespeech/server/tests/asr/online/web/static/fonts/fontawesome-webfont.woff b/demos/streaming_asr_server/web/static/fonts/fontawesome-webfont.woff
similarity index 100%
rename from paddlespeech/server/tests/asr/online/web/static/fonts/fontawesome-webfont.woff
rename to demos/streaming_asr_server/web/static/fonts/fontawesome-webfont.woff
diff --git a/paddlespeech/server/tests/asr/online/web/static/fonts/fontawesome-webfont.woff2 b/demos/streaming_asr_server/web/static/fonts/fontawesome-webfont.woff2
similarity index 100%
rename from paddlespeech/server/tests/asr/online/web/static/fonts/fontawesome-webfont.woff2
rename to demos/streaming_asr_server/web/static/fonts/fontawesome-webfont.woff2
diff --git a/paddlespeech/server/tests/asr/online/web/static/image/PaddleSpeech_logo.png b/demos/streaming_asr_server/web/static/image/PaddleSpeech_logo.png
similarity index 100%
rename from paddlespeech/server/tests/asr/online/web/static/image/PaddleSpeech_logo.png
rename to demos/streaming_asr_server/web/static/image/PaddleSpeech_logo.png
diff --git a/paddlespeech/server/tests/asr/online/web/static/image/voice-dictation.svg b/demos/streaming_asr_server/web/static/image/voice-dictation.svg
similarity index 100%
rename from paddlespeech/server/tests/asr/online/web/static/image/voice-dictation.svg
rename to demos/streaming_asr_server/web/static/image/voice-dictation.svg
diff --git a/paddlespeech/server/tests/asr/online/web/static/js/SoundRecognizer.js b/demos/streaming_asr_server/web/static/js/SoundRecognizer.js
similarity index 100%
rename from paddlespeech/server/tests/asr/online/web/static/js/SoundRecognizer.js
rename to demos/streaming_asr_server/web/static/js/SoundRecognizer.js
diff --git a/paddlespeech/server/tests/asr/online/web/static/js/jquery-3.2.1.min.js b/demos/streaming_asr_server/web/static/js/jquery-3.2.1.min.js
similarity index 100%
rename from paddlespeech/server/tests/asr/online/web/static/js/jquery-3.2.1.min.js
rename to demos/streaming_asr_server/web/static/js/jquery-3.2.1.min.js
diff --git a/paddlespeech/server/tests/asr/online/web/static/js/recorder/engine/mp3.js b/demos/streaming_asr_server/web/static/js/recorder/engine/mp3.js
similarity index 100%
rename from paddlespeech/server/tests/asr/online/web/static/js/recorder/engine/mp3.js
rename to demos/streaming_asr_server/web/static/js/recorder/engine/mp3.js
diff --git a/paddlespeech/server/tests/asr/online/web/static/js/recorder/engine/pcm.js b/demos/streaming_asr_server/web/static/js/recorder/engine/pcm.js
similarity index 100%
rename from paddlespeech/server/tests/asr/online/web/static/js/recorder/engine/pcm.js
rename to demos/streaming_asr_server/web/static/js/recorder/engine/pcm.js
diff --git a/paddlespeech/server/tests/asr/online/web/static/js/recorder/engine/wav.js b/demos/streaming_asr_server/web/static/js/recorder/engine/wav.js
similarity index 100%
rename from paddlespeech/server/tests/asr/online/web/static/js/recorder/engine/wav.js
rename to demos/streaming_asr_server/web/static/js/recorder/engine/wav.js
diff --git a/paddlespeech/server/tests/asr/online/web/static/js/recorder/extensions/frequency.histogram.view.js b/demos/streaming_asr_server/web/static/js/recorder/extensions/frequency.histogram.view.js
similarity index 100%
rename from paddlespeech/server/tests/asr/online/web/static/js/recorder/extensions/frequency.histogram.view.js
rename to demos/streaming_asr_server/web/static/js/recorder/extensions/frequency.histogram.view.js
diff --git a/paddlespeech/server/tests/asr/online/web/static/js/recorder/extensions/lib.fft.js b/demos/streaming_asr_server/web/static/js/recorder/extensions/lib.fft.js
similarity index 100%
rename from paddlespeech/server/tests/asr/online/web/static/js/recorder/extensions/lib.fft.js
rename to demos/streaming_asr_server/web/static/js/recorder/extensions/lib.fft.js
diff --git a/paddlespeech/server/tests/asr/online/web/static/js/recorder/recorder-core.js b/demos/streaming_asr_server/web/static/js/recorder/recorder-core.js
similarity index 100%
rename from paddlespeech/server/tests/asr/online/web/static/js/recorder/recorder-core.js
rename to demos/streaming_asr_server/web/static/js/recorder/recorder-core.js
diff --git a/paddlespeech/server/tests/asr/online/web/static/paddle.ico b/demos/streaming_asr_server/web/static/paddle.ico
similarity index 100%
rename from paddlespeech/server/tests/asr/online/web/static/paddle.ico
rename to demos/streaming_asr_server/web/static/paddle.ico
diff --git a/paddlespeech/server/tests/asr/online/web/templates/index.html b/demos/streaming_asr_server/web/templates/index.html
similarity index 99%
rename from paddlespeech/server/tests/asr/online/web/templates/index.html
rename to demos/streaming_asr_server/web/templates/index.html
index 7aa227fb1..56c630808 100644
--- a/paddlespeech/server/tests/asr/online/web/templates/index.html
+++ b/demos/streaming_asr_server/web/templates/index.html
@@ -93,7 +93,7 @@
function parseResult(data) {
var data = JSON.parse(data)
- var result = data.asr_results
+ var result = data.result
console.log(result)
$("#resultPanel").html(result)
}
@@ -152,4 +152,4 @@