From 1a0c2bea5dae1c0aa865b50b82f1145ad364de66 Mon Sep 17 00:00:00 2001 From: xiongxinlei Date: Thu, 21 Apr 2022 15:31:31 +0800 Subject: [PATCH 1/3] add streaming asr demo, test=doc --- demos/streaming_asr_server/README.md | 246 ++++++++++++ demos/streaming_asr_server/README_cn.md | 356 ++++++++++++++++++ .../conf/ws_application.yaml | 47 +++ .../conf/ws_conformer_application.yaml | 45 +++ demos/streaming_asr_server/run.sh | 2 + demos/streaming_asr_server/test.sh | 5 + .../streaming_asr_server}/web/app.py | 0 .../web/paddle_web_demo.png | Bin .../streaming_asr_server}/web/readme.md | 0 .../web/static/css/font-awesome.min.css | 0 .../web/static/css/style.css | 0 .../web/static/fonts/FontAwesome.otf | Bin .../web/static/fonts/fontawesome-webfont.eot | Bin .../web/static/fonts/fontawesome-webfont.svg | 0 .../web/static/fonts/fontawesome-webfont.ttf | Bin .../web/static/fonts/fontawesome-webfont.woff | Bin .../static/fonts/fontawesome-webfont.woff2 | Bin .../web/static/image/PaddleSpeech_logo.png | Bin .../web/static/image/voice-dictation.svg | 0 .../web/static/js/SoundRecognizer.js | 0 .../web/static/js/jquery-3.2.1.min.js | 0 .../web/static/js/recorder/engine/mp3.js | 0 .../web/static/js/recorder/engine/pcm.js | 0 .../web/static/js/recorder/engine/wav.js | 0 .../extensions/frequency.histogram.view.js | 0 .../static/js/recorder/extensions/lib.fft.js | 0 .../web/static/js/recorder/recorder-core.js | 0 .../web/static/paddle.ico | Bin .../web/templates/index.html | 0 .../streaming_asr_server/websocket_client.py | 62 +++ .../server/bin/paddlespeech_client.py | 14 +- .../server/conf/ws_conformer_application.yaml | 2 +- .../server/engine/asr/online/asr_engine.py | 11 + .../server/tests/asr/online/README_cn.md | 49 --- .../server/tests/asr/online/__init__.py | 13 - .../tests/asr/online/microphone_client.py | 161 -------- .../tests/asr/online/websocket_client.py | 139 ------- 37 files changed, 784 insertions(+), 368 deletions(-) create mode 100644 demos/streaming_asr_server/README.md create mode 100644 demos/streaming_asr_server/README_cn.md create mode 100644 demos/streaming_asr_server/conf/ws_application.yaml create mode 100644 demos/streaming_asr_server/conf/ws_conformer_application.yaml create mode 100644 demos/streaming_asr_server/run.sh create mode 100644 demos/streaming_asr_server/test.sh rename {paddlespeech/server/tests/asr/online => demos/streaming_asr_server}/web/app.py (100%) rename {paddlespeech/server/tests/asr/online => demos/streaming_asr_server}/web/paddle_web_demo.png (100%) rename {paddlespeech/server/tests/asr/online => demos/streaming_asr_server}/web/readme.md (100%) rename {paddlespeech/server/tests/asr/online => demos/streaming_asr_server}/web/static/css/font-awesome.min.css (100%) rename {paddlespeech/server/tests/asr/online => demos/streaming_asr_server}/web/static/css/style.css (100%) rename {paddlespeech/server/tests/asr/online => demos/streaming_asr_server}/web/static/fonts/FontAwesome.otf (100%) rename {paddlespeech/server/tests/asr/online => demos/streaming_asr_server}/web/static/fonts/fontawesome-webfont.eot (100%) rename {paddlespeech/server/tests/asr/online => demos/streaming_asr_server}/web/static/fonts/fontawesome-webfont.svg (100%) rename {paddlespeech/server/tests/asr/online => demos/streaming_asr_server}/web/static/fonts/fontawesome-webfont.ttf (100%) rename {paddlespeech/server/tests/asr/online => demos/streaming_asr_server}/web/static/fonts/fontawesome-webfont.woff (100%) rename {paddlespeech/server/tests/asr/online => demos/streaming_asr_server}/web/static/fonts/fontawesome-webfont.woff2 (100%) rename {paddlespeech/server/tests/asr/online => demos/streaming_asr_server}/web/static/image/PaddleSpeech_logo.png (100%) rename {paddlespeech/server/tests/asr/online => demos/streaming_asr_server}/web/static/image/voice-dictation.svg (100%) rename {paddlespeech/server/tests/asr/online => demos/streaming_asr_server}/web/static/js/SoundRecognizer.js (100%) rename {paddlespeech/server/tests/asr/online => demos/streaming_asr_server}/web/static/js/jquery-3.2.1.min.js (100%) rename {paddlespeech/server/tests/asr/online => demos/streaming_asr_server}/web/static/js/recorder/engine/mp3.js (100%) rename {paddlespeech/server/tests/asr/online => demos/streaming_asr_server}/web/static/js/recorder/engine/pcm.js (100%) rename {paddlespeech/server/tests/asr/online => demos/streaming_asr_server}/web/static/js/recorder/engine/wav.js (100%) rename {paddlespeech/server/tests/asr/online => demos/streaming_asr_server}/web/static/js/recorder/extensions/frequency.histogram.view.js (100%) rename {paddlespeech/server/tests/asr/online => demos/streaming_asr_server}/web/static/js/recorder/extensions/lib.fft.js (100%) rename {paddlespeech/server/tests/asr/online => demos/streaming_asr_server}/web/static/js/recorder/recorder-core.js (100%) rename {paddlespeech/server/tests/asr/online => demos/streaming_asr_server}/web/static/paddle.ico (100%) rename {paddlespeech/server/tests/asr/online => demos/streaming_asr_server}/web/templates/index.html (100%) create mode 100644 demos/streaming_asr_server/websocket_client.py delete mode 100644 paddlespeech/server/tests/asr/online/README_cn.md delete mode 100644 paddlespeech/server/tests/asr/online/__init__.py delete mode 100644 paddlespeech/server/tests/asr/online/microphone_client.py delete mode 100644 paddlespeech/server/tests/asr/online/websocket_client.py diff --git a/demos/streaming_asr_server/README.md b/demos/streaming_asr_server/README.md new file mode 100644 index 00000000..0323d398 --- /dev/null +++ b/demos/streaming_asr_server/README.md @@ -0,0 +1,246 @@ +([简体中文](./README_cn.md)|English) + +# Speech Server + +## Introduction +This demo is an implementation of starting the voice service and accessing the service. It can be achieved with a single command using `paddlespeech_server` and `paddlespeech_client` or a few lines of code in python. + + +## Usage +### 1. Installation +see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md). + +It is recommended to use **paddlepaddle 2.2.1** or above. +You can choose one way from meduim and hard to install paddlespeech. + +### 2. Prepare config File +The configuration file can be found in `conf/application.yaml` . +Among them, `engine_list` indicates the speech engine that will be included in the service to be started, in the format of `_`. +At present, the speech tasks integrated by the service include: asr (speech recognition), tts (text to sppech) and cls (audio classification). +Currently the engine type supports two forms: python and inference (Paddle Inference) + + +The input of ASR client demo should be a WAV file(`.wav`), and the sample rate must be the same as the model. + +Here are sample files for thisASR client demo that can be downloaded: +```bash +wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav +``` + +### 3. Server Usage +- Command Line (Recommended) + + ```bash + # start the service + paddlespeech_server start --config_file ./conf/application.yaml + ``` + + Usage: + + ```bash + paddlespeech_server start --help + ``` + Arguments: + - `config_file`: yaml file of the app, defalut: ./conf/application.yaml + - `log_file`: log file. Default: ./log/paddlespeech.log + + Output: + ```bash + [2022-02-23 11:17:32] [INFO] [server.py:64] Started server process [6384] + INFO: Waiting for application startup. + [2022-02-23 11:17:32] [INFO] [on.py:26] Waiting for application startup. + INFO: Application startup complete. + [2022-02-23 11:17:32] [INFO] [on.py:38] Application startup complete. + INFO: Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit) + [2022-02-23 11:17:32] [INFO] [server.py:204] Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit) + + ``` + +- Python API + ```python + from paddlespeech.server.bin.paddlespeech_server import ServerExecutor + + server_executor = ServerExecutor() + server_executor( + config_file="./conf/application.yaml", + log_file="./log/paddlespeech.log") + ``` + + Output: + ```bash + INFO: Started server process [529] + [2022-02-23 14:57:56] [INFO] [server.py:64] Started server process [529] + INFO: Waiting for application startup. + [2022-02-23 14:57:56] [INFO] [on.py:26] Waiting for application startup. + INFO: Application startup complete. + [2022-02-23 14:57:56] [INFO] [on.py:38] Application startup complete. + INFO: Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit) + [2022-02-23 14:57:56] [INFO] [server.py:204] Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit) + + ``` + + +### 4. ASR Client Usage +**Note:** The response time will be slightly longer when using the client for the first time +- Command Line (Recommended) + ``` + paddlespeech_client asr --server_ip 127.0.0.1 --port 8090 --input ./zh.wav + ``` + + Usage: + + ```bash + paddlespeech_client asr --help + ``` + Arguments: + - `server_ip`: server ip. Default: 127.0.0.1 + - `port`: server port. Default: 8090 + - `input`(required): Audio file to be recognized. + - `sample_rate`: Audio ampling rate, default: 16000. + - `lang`: Language. Default: "zh_cn". + - `audio_format`: Audio format. Default: "wav". + + Output: + ```bash + [2022-02-23 18:11:22,819] [ INFO] - {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'transcription': '我认为跑步最重要的就是给我带来了身体健康'}} + [2022-02-23 18:11:22,820] [ INFO] - time cost 0.689145 s. + + ``` + +- Python API + ```python + from paddlespeech.server.bin.paddlespeech_client import ASRClientExecutor + import json + + asrclient_executor = ASRClientExecutor() + res = asrclient_executor( + input="./zh.wav", + server_ip="127.0.0.1", + port=8090, + sample_rate=16000, + lang="zh_cn", + audio_format="wav") + print(res.json()) + ``` + + Output: + ```bash + {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'transcription': '我认为跑步最重要的就是给我带来了身体健康'}} + ``` + +### 5. TTS Client Usage +**Note:** The response time will be slightly longer when using the client for the first time +- Command Line (Recommended) + ```bash + paddlespeech_client tts --server_ip 127.0.0.1 --port 8090 --input "您好,欢迎使用百度飞桨语音合成服务。" --output output.wav + ``` + Usage: + + ```bash + paddlespeech_client tts --help + ``` + Arguments: + - `server_ip`: server ip. Default: 127.0.0.1 + - `port`: server port. Default: 8090 + - `input`(required): Input text to generate. + - `spk_id`: Speaker id for multi-speaker text to speech. Default: 0 + - `speed`: Audio speed, the value should be set between 0 and 3. Default: 1.0 + - `volume`: Audio volume, the value should be set between 0 and 3. Default: 1.0 + - `sample_rate`: Sampling rate, choice: [0, 8000, 16000], the default is the same as the model. Default: 0 + - `output`: Output wave filepath. Default: None, which means not to save the audio to the local. + + Output: + ```bash + [2022-02-23 15:20:37,875] [ INFO] - {'description': 'success.'} + [2022-02-23 15:20:37,875] [ INFO] - Save synthesized audio successfully on output.wav. + [2022-02-23 15:20:37,875] [ INFO] - Audio duration: 3.612500 s. + [2022-02-23 15:20:37,875] [ INFO] - Response time: 0.348050 s. + + ``` + +- Python API + ```python + from paddlespeech.server.bin.paddlespeech_client import TTSClientExecutor + import json + + ttsclient_executor = TTSClientExecutor() + res = ttsclient_executor( + input="您好,欢迎使用百度飞桨语音合成服务。", + server_ip="127.0.0.1", + port=8090, + spk_id=0, + speed=1.0, + volume=1.0, + sample_rate=0, + output="./output.wav") + + response_dict = res.json() + print(response_dict["message"]) + print("Save synthesized audio successfully on %s." % (response_dict['result']['save_path'])) + print("Audio duration: %f s." %(response_dict['result']['duration'])) + ``` + + Output: + ```bash + {'description': 'success.'} + Save synthesized audio successfully on ./output.wav. + Audio duration: 3.612500 s. + + ``` + +### 6. CLS Client Usage +**Note:** The response time will be slightly longer when using the client for the first time +- Command Line (Recommended) + ``` + paddlespeech_client cls --server_ip 127.0.0.1 --port 8090 --input ./zh.wav + ``` + + Usage: + + ```bash + paddlespeech_client cls --help + ``` + Arguments: + - `server_ip`: server ip. Default: 127.0.0.1 + - `port`: server port. Default: 8090 + - `input`(required): Audio file to be classified. + - `topk`: topk scores of classification result. + + Output: + ```bash + [2022-03-09 20:44:39,974] [ INFO] - {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'topk': 1, 'results': [{'class_name': 'Speech', 'prob': 0.9027184844017029}]}} + [2022-03-09 20:44:39,975] [ INFO] - Response time 0.104360 s. + + + ``` + +- Python API + ```python + from paddlespeech.server.bin.paddlespeech_client import CLSClientExecutor + import json + + clsclient_executor = CLSClientExecutor() + res = clsclient_executor( + input="./zh.wav", + server_ip="127.0.0.1", + port=8090, + topk=1) + print(res.json()) + ``` + + Output: + ```bash + {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'topk': 1, 'results': [{'class_name': 'Speech', 'prob': 0.9027184844017029}]}} + + ``` + + +## Models supported by the service +### ASR model +Get all models supported by the ASR service via `paddlespeech_server stats --task asr`, where static models can be used for paddle inference inference. + +### TTS model +Get all models supported by the TTS service via `paddlespeech_server stats --task tts`, where static models can be used for paddle inference inference. + +### CLS model +Get all models supported by the CLS service via `paddlespeech_server stats --task cls`, where static models can be used for paddle inference inference. diff --git a/demos/streaming_asr_server/README_cn.md b/demos/streaming_asr_server/README_cn.md new file mode 100644 index 00000000..c58e17e9 --- /dev/null +++ b/demos/streaming_asr_server/README_cn.md @@ -0,0 +1,356 @@ +([English](./README.md)|中文) + +# 语音服务 + +## 介绍 +这个demo是一个启动流式语音服务和访问服务的实现。 它可以通过使用`paddlespeech_server` 和 `paddlespeech_client`的单个命令或 python 的几行代码来实现。 + + +## 使用方法 +### 1. 安装 +请看 [安装文档](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md). + +推荐使用 **paddlepaddle 2.2.1** 或以上版本。 +你可以从 medium,hard 三中方式中选择一种方式安装 PaddleSpeech。 + + +### 2. 准备配置文件 +配置文件可参见 `conf/ws_application.yaml` 和 `conf/ws_conformer_application.yaml` 。 +目前服务集成的模型有: DeepSpeech2和conformer模型。 + + +这个 ASR client 的输入应该是一个 WAV 文件(`.wav`),并且采样率必须与模型的采样率相同。 + +可以下载此 ASR client的示例音频: +```bash +wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav +``` + +### 3. 服务端使用方法 +- 命令行 (推荐使用) + + ```bash + # 启动服务 + paddlespeech_server start --config_file ./conf/ws_conformer_application.yaml + ``` + + 使用方法: + + ```bash + paddlespeech_server start --help + ``` + 参数: + - `config_file`: 服务的配置文件,默认: ./conf/ws_conformer_application.yaml + - `log_file`: log 文件. 默认:./log/paddlespeech.log + + 输出: + ```bash + [2022-04-21 15:52:18,126] [ INFO] - create the online asr engine instance + [2022-04-21 15:52:18,127] [ INFO] - paddlespeech_server set the device: cpu + [2022-04-21 15:52:18,128] [ INFO] - Load the pretrained model, tag = conformer_online_multicn-zh-16k + [2022-04-21 15:52:18,128] [ INFO] - File /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k/asr1_chunk_conformer_multi_cn_ckpt_0.2.3.model.tar.gz md5 checking... + [2022-04-21 15:52:18,727] [ INFO] - Use pretrained model stored in: /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k + [2022-04-21 15:52:18,727] [ INFO] - /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k + [2022-04-21 15:52:18,727] [ INFO] - /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k/model.yaml + [2022-04-21 15:52:18,727] [ INFO] - /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k/exp/chunk_conformer/checkpoints/multi_cn.pdparams + [2022-04-21 15:52:18,727] [ INFO] - /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k/exp/chunk_conformer/checkpoints/multi_cn.pdparams + [2022-04-21 15:52:19,446] [ INFO] - start to create the stream conformer asr engine + [2022-04-21 15:52:19,473] [ INFO] - model name: conformer_online + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + [2022-04-21 15:52:21,731] [ INFO] - create the transformer like model success + [2022-04-21 15:52:21,733] [ INFO] - Initialize ASR server engine successfully. + INFO: Started server process [11173] + [2022-04-21 15:52:21] [INFO] [server.py:75] Started server process [11173] + INFO: Waiting for application startup. + [2022-04-21 15:52:21] [INFO] [on.py:45] Waiting for application startup. + INFO: Application startup complete. + [2022-04-21 15:52:21] [INFO] [on.py:59] Application startup complete. + /home/users/xiongxinlei/.conda/envs/paddlespeech/lib/python3.9/asyncio/base_events.py:1460: DeprecationWarning: The loop argument is deprecated since Python 3.8, and scheduled for removal in Python 3.10. + infos = await tasks.gather(*fs, loop=self) + /home/users/xiongxinlei/.conda/envs/paddlespeech/lib/python3.9/asyncio/base_events.py:1518: DeprecationWarning: The loop argument is deprecated since Python 3.8, and scheduled for removal in Python 3.10. + await tasks.sleep(0, loop=self) + INFO: Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit) + [2022-04-21 15:52:21] [INFO] [server.py:206] Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit) + ``` + +- Python API + ```python + from paddlespeech.server.bin.paddlespeech_server import ServerExecutor + + server_executor = ServerExecutor() + server_executor( + config_file="./conf/ws_conformer_application.yaml", + log_file="./log/paddlespeech.log") + ``` + + 输出: + ```bash + [2022-04-21 15:52:18,126] [ INFO] - create the online asr engine instance + [2022-04-21 15:52:18,127] [ INFO] - paddlespeech_server set the device: cpu + [2022-04-21 15:52:18,128] [ INFO] - Load the pretrained model, tag = conformer_online_multicn-zh-16k + [2022-04-21 15:52:18,128] [ INFO] - File /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k/asr1_chunk_conformer_multi_cn_ckpt_0.2.3.model.tar.gz md5 checking... + [2022-04-21 15:52:18,727] [ INFO] - Use pretrained model stored in: /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k + [2022-04-21 15:52:18,727] [ INFO] - /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k + [2022-04-21 15:52:18,727] [ INFO] - /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k/model.yaml + [2022-04-21 15:52:18,727] [ INFO] - /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k/exp/chunk_conformer/checkpoints/multi_cn.pdparams + [2022-04-21 15:52:18,727] [ INFO] - /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k/exp/chunk_conformer/checkpoints/multi_cn.pdparams + [2022-04-21 15:52:19,446] [ INFO] - start to create the stream conformer asr engine + [2022-04-21 15:52:19,473] [ INFO] - model name: conformer_online + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + [2022-04-21 15:52:21,731] [ INFO] - create the transformer like model success + [2022-04-21 15:52:21,733] [ INFO] - Initialize ASR server engine successfully. + INFO: Started server process [11173] + [2022-04-21 15:52:21] [INFO] [server.py:75] Started server process [11173] + INFO: Waiting for application startup. + [2022-04-21 15:52:21] [INFO] [on.py:45] Waiting for application startup. + INFO: Application startup complete. + [2022-04-21 15:52:21] [INFO] [on.py:59] Application startup complete. + /home/users/xiongxinlei/.conda/envs/paddlespeech/lib/python3.9/asyncio/base_events.py:1460: DeprecationWarning: The loop argument is deprecated since Python 3.8, and scheduled for removal in Python 3.10. + infos = await tasks.gather(*fs, loop=self) + /home/users/xiongxinlei/.conda/envs/paddlespeech/lib/python3.9/asyncio/base_events.py:1518: DeprecationWarning: The loop argument is deprecated since Python 3.8, and scheduled for removal in Python 3.10. + await tasks.sleep(0, loop=self) + INFO: Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit) + [2022-04-21 15:52:21] [INFO] [server.py:206] Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit) + ``` + +### 4. ASR 客户端使用方法 +**注意:** 初次使用客户端时响应时间会略长 +- 命令行 (推荐使用) + ``` + paddlespeech_client asr_online --server_ip 127.0.0.1 --port 8090 --input ./zh.wav + + ``` + + 使用帮助: + + ```bash + paddlespeech_client asr_online --help + ``` + + 参数: + - `server_ip`: 服务端ip地址,默认: 127.0.0.1。 + - `port`: 服务端口,默认: 8090。 + - `input`(必须输入): 用于识别的音频文件。 + - `sample_rate`: 音频采样率,默认值:16000。 + - `lang`: 模型语言,默认值:zh_cn。 + - `audio_format`: 音频格式,默认值:wav。 + + 输出: + + ```bash + [2022-04-21 15:59:03,904] [ INFO] - receive msg={"status": "ok", "signal": "server_ready"} + [2022-04-21 15:59:03,960] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:03,973] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:03,987] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,000] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,012] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,024] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,036] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,047] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,607] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,620] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,633] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,645] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,657] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,669] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,680] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:05,176] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,185] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,192] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,200] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,208] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,216] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,224] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,232] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,724] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:05,732] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:05,740] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:05,747] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:05,755] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:05,763] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:05,770] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:06,271] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,279] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,287] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,294] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,302] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,310] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,318] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,326] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,833] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:06,842] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:06,850] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:06,858] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:06,866] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:06,874] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:06,882] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:07,400] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,408] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,416] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,424] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,432] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,440] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,447] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,455] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,984] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:07,992] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:08,001] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:08,008] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:08,016] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:08,024] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:12,883] [ INFO] - final receive msg={'status': 'ok', 'signal': 'finished', 'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:12,884] [ INFO] - 我认为跑步最重要的就是给我带来了身体健康 + [2022-04-21 15:59:12,884] [ INFO] - Response time 9.051567 s. + ``` + +- Python API + ```python + from paddlespeech.server.bin.paddlespeech_client import ASROnlineClientExecutor + import json + + asrclient_executor = ASROnlineClientExecutor() + res = asrclient_executor( + input="./zh.wav", + server_ip="127.0.0.1", + port=8090, + sample_rate=16000, + lang="zh_cn", + audio_format="wav") + print(res.json()) + ``` + + 输出: + ```bash + [2022-04-21 15:59:03,904] [ INFO] - receive msg={"status": "ok", "signal": "server_ready"} + [2022-04-21 15:59:03,960] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:03,973] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:03,987] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,000] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,012] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,024] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,036] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,047] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,607] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,620] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,633] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,645] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,657] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,669] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,680] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:05,176] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,185] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,192] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,200] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,208] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,216] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,224] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,232] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,724] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:05,732] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:05,740] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:05,747] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:05,755] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:05,763] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:05,770] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:06,271] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,279] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,287] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,294] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,302] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,310] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,318] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,326] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,833] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:06,842] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:06,850] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:06,858] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:06,866] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:06,874] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:06,882] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:07,400] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,408] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,416] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,424] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,432] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,440] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,447] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,455] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,984] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:07,992] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:08,001] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:08,008] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:08,016] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:08,024] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:12,883] [ INFO] - final receive msg={'status': 'ok', 'signal': 'finished', 'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:12,884] [ INFO] - 我认为跑步最重要的就是给我带来了身体健康 + ``` \ No newline at end of file diff --git a/demos/streaming_asr_server/conf/ws_application.yaml b/demos/streaming_asr_server/conf/ws_application.yaml new file mode 100644 index 00000000..dee8d78b --- /dev/null +++ b/demos/streaming_asr_server/conf/ws_application.yaml @@ -0,0 +1,47 @@ +# This is the parameter configuration file for PaddleSpeech Serving. + +################################################################################# +# SERVER SETTING # +################################################################################# +host: 0.0.0.0 +port: 8090 + +# The task format in the engin_list is: _ +# task choices = ['asr_online', 'tts_online'] +# protocol = ['websocket', 'http'] (only one can be selected). +# websocket only support online engine type. +protocol: 'websocket' +engine_list: ['asr_online'] + + +################################################################################# +# ENGINE CONFIG # +################################################################################# + +################################### ASR ######################################### +################### speech task: asr; engine_type: online ####################### +asr_online: + model_type: 'deepspeech2online_aishell' + am_model: # the pdmodel file of am static model [optional] + am_params: # the pdiparams file of am static model [optional] + lang: 'zh' + sample_rate: 16000 + cfg_path: + decode_method: + force_yes: True + + am_predictor_conf: + device: # set 'gpu:id' or 'cpu' + switch_ir_optim: True + glog_info: False # True -> print glog + summary: True # False -> do not show predictor config + + chunk_buffer_conf: + frame_duration_ms: 80 + shift_ms: 40 + sample_rate: 16000 + sample_width: 2 + window_n: 7 # frame + shift_n: 4 # frame + window_ms: 20 # ms + shift_ms: 10 # ms diff --git a/demos/streaming_asr_server/conf/ws_conformer_application.yaml b/demos/streaming_asr_server/conf/ws_conformer_application.yaml new file mode 100644 index 00000000..2a76ee39 --- /dev/null +++ b/demos/streaming_asr_server/conf/ws_conformer_application.yaml @@ -0,0 +1,45 @@ +# This is the parameter configuration file for PaddleSpeech Serving. + +################################################################################# +# SERVER SETTING # +################################################################################# +host: 0.0.0.0 +port: 8090 + +# The task format in the engin_list is: _ +# task choices = ['asr_online', 'tts_online'] +# protocol = ['websocket', 'http'] (only one can be selected). +# websocket only support online engine type. +protocol: 'websocket' +engine_list: ['asr_online'] + + +################################################################################# +# ENGINE CONFIG # +################################################################################# + +################################### ASR ######################################### +################### speech task: asr; engine_type: online ####################### +asr_online: + model_type: 'conformer_online_multicn' + am_model: # the pdmodel file of am static model [optional] + am_params: # the pdiparams file of am static model [optional] + lang: 'zh' + sample_rate: 16000 + cfg_path: + decode_method: + force_yes: True + device: 'cpu' # cpu or gpu:id + am_predictor_conf: + device: # set 'gpu:id' or 'cpu' + switch_ir_optim: True + glog_info: False # True -> print glog + summary: True # False -> do not show predictor config + + chunk_buffer_conf: + window_n: 7 # frame + shift_n: 4 # frame + window_ms: 25 # ms + shift_ms: 10 # ms + sample_rate: 16000 + sample_width: 2 \ No newline at end of file diff --git a/demos/streaming_asr_server/run.sh b/demos/streaming_asr_server/run.sh new file mode 100644 index 00000000..d2ca3447 --- /dev/null +++ b/demos/streaming_asr_server/run.sh @@ -0,0 +1,2 @@ +# start the streaming asr service +paddlespeech_server start --config_file ./conf/ws_conformer_application.yaml \ No newline at end of file diff --git a/demos/streaming_asr_server/test.sh b/demos/streaming_asr_server/test.sh new file mode 100644 index 00000000..fe8155cf --- /dev/null +++ b/demos/streaming_asr_server/test.sh @@ -0,0 +1,5 @@ +# download the test wav +wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav + +# read the wav and pass it to service +python3 websocket_client.py --wavfile ./zh.wav diff --git a/paddlespeech/server/tests/asr/online/web/app.py b/demos/streaming_asr_server/web/app.py similarity index 100% rename from paddlespeech/server/tests/asr/online/web/app.py rename to demos/streaming_asr_server/web/app.py diff --git a/paddlespeech/server/tests/asr/online/web/paddle_web_demo.png b/demos/streaming_asr_server/web/paddle_web_demo.png similarity index 100% rename from paddlespeech/server/tests/asr/online/web/paddle_web_demo.png rename to demos/streaming_asr_server/web/paddle_web_demo.png diff --git a/paddlespeech/server/tests/asr/online/web/readme.md b/demos/streaming_asr_server/web/readme.md similarity index 100% rename from paddlespeech/server/tests/asr/online/web/readme.md rename to demos/streaming_asr_server/web/readme.md diff --git a/paddlespeech/server/tests/asr/online/web/static/css/font-awesome.min.css b/demos/streaming_asr_server/web/static/css/font-awesome.min.css similarity index 100% rename from paddlespeech/server/tests/asr/online/web/static/css/font-awesome.min.css rename to demos/streaming_asr_server/web/static/css/font-awesome.min.css diff --git a/paddlespeech/server/tests/asr/online/web/static/css/style.css b/demos/streaming_asr_server/web/static/css/style.css similarity index 100% rename from paddlespeech/server/tests/asr/online/web/static/css/style.css rename to demos/streaming_asr_server/web/static/css/style.css diff --git a/paddlespeech/server/tests/asr/online/web/static/fonts/FontAwesome.otf b/demos/streaming_asr_server/web/static/fonts/FontAwesome.otf similarity index 100% rename from paddlespeech/server/tests/asr/online/web/static/fonts/FontAwesome.otf rename to demos/streaming_asr_server/web/static/fonts/FontAwesome.otf diff --git a/paddlespeech/server/tests/asr/online/web/static/fonts/fontawesome-webfont.eot b/demos/streaming_asr_server/web/static/fonts/fontawesome-webfont.eot similarity index 100% rename from paddlespeech/server/tests/asr/online/web/static/fonts/fontawesome-webfont.eot rename to demos/streaming_asr_server/web/static/fonts/fontawesome-webfont.eot diff --git a/paddlespeech/server/tests/asr/online/web/static/fonts/fontawesome-webfont.svg b/demos/streaming_asr_server/web/static/fonts/fontawesome-webfont.svg similarity index 100% rename from paddlespeech/server/tests/asr/online/web/static/fonts/fontawesome-webfont.svg rename to demos/streaming_asr_server/web/static/fonts/fontawesome-webfont.svg diff --git a/paddlespeech/server/tests/asr/online/web/static/fonts/fontawesome-webfont.ttf b/demos/streaming_asr_server/web/static/fonts/fontawesome-webfont.ttf similarity index 100% rename from paddlespeech/server/tests/asr/online/web/static/fonts/fontawesome-webfont.ttf rename to demos/streaming_asr_server/web/static/fonts/fontawesome-webfont.ttf diff --git a/paddlespeech/server/tests/asr/online/web/static/fonts/fontawesome-webfont.woff b/demos/streaming_asr_server/web/static/fonts/fontawesome-webfont.woff similarity index 100% rename from paddlespeech/server/tests/asr/online/web/static/fonts/fontawesome-webfont.woff rename to demos/streaming_asr_server/web/static/fonts/fontawesome-webfont.woff diff --git a/paddlespeech/server/tests/asr/online/web/static/fonts/fontawesome-webfont.woff2 b/demos/streaming_asr_server/web/static/fonts/fontawesome-webfont.woff2 similarity index 100% rename from paddlespeech/server/tests/asr/online/web/static/fonts/fontawesome-webfont.woff2 rename to demos/streaming_asr_server/web/static/fonts/fontawesome-webfont.woff2 diff --git a/paddlespeech/server/tests/asr/online/web/static/image/PaddleSpeech_logo.png b/demos/streaming_asr_server/web/static/image/PaddleSpeech_logo.png similarity index 100% rename from paddlespeech/server/tests/asr/online/web/static/image/PaddleSpeech_logo.png rename to demos/streaming_asr_server/web/static/image/PaddleSpeech_logo.png diff --git a/paddlespeech/server/tests/asr/online/web/static/image/voice-dictation.svg b/demos/streaming_asr_server/web/static/image/voice-dictation.svg similarity index 100% rename from paddlespeech/server/tests/asr/online/web/static/image/voice-dictation.svg rename to demos/streaming_asr_server/web/static/image/voice-dictation.svg diff --git a/paddlespeech/server/tests/asr/online/web/static/js/SoundRecognizer.js b/demos/streaming_asr_server/web/static/js/SoundRecognizer.js similarity index 100% rename from paddlespeech/server/tests/asr/online/web/static/js/SoundRecognizer.js rename to demos/streaming_asr_server/web/static/js/SoundRecognizer.js diff --git a/paddlespeech/server/tests/asr/online/web/static/js/jquery-3.2.1.min.js b/demos/streaming_asr_server/web/static/js/jquery-3.2.1.min.js similarity index 100% rename from paddlespeech/server/tests/asr/online/web/static/js/jquery-3.2.1.min.js rename to demos/streaming_asr_server/web/static/js/jquery-3.2.1.min.js diff --git a/paddlespeech/server/tests/asr/online/web/static/js/recorder/engine/mp3.js b/demos/streaming_asr_server/web/static/js/recorder/engine/mp3.js similarity index 100% rename from paddlespeech/server/tests/asr/online/web/static/js/recorder/engine/mp3.js rename to demos/streaming_asr_server/web/static/js/recorder/engine/mp3.js diff --git a/paddlespeech/server/tests/asr/online/web/static/js/recorder/engine/pcm.js b/demos/streaming_asr_server/web/static/js/recorder/engine/pcm.js similarity index 100% rename from paddlespeech/server/tests/asr/online/web/static/js/recorder/engine/pcm.js rename to demos/streaming_asr_server/web/static/js/recorder/engine/pcm.js diff --git a/paddlespeech/server/tests/asr/online/web/static/js/recorder/engine/wav.js b/demos/streaming_asr_server/web/static/js/recorder/engine/wav.js similarity index 100% rename from paddlespeech/server/tests/asr/online/web/static/js/recorder/engine/wav.js rename to demos/streaming_asr_server/web/static/js/recorder/engine/wav.js diff --git a/paddlespeech/server/tests/asr/online/web/static/js/recorder/extensions/frequency.histogram.view.js b/demos/streaming_asr_server/web/static/js/recorder/extensions/frequency.histogram.view.js similarity index 100% rename from paddlespeech/server/tests/asr/online/web/static/js/recorder/extensions/frequency.histogram.view.js rename to demos/streaming_asr_server/web/static/js/recorder/extensions/frequency.histogram.view.js diff --git a/paddlespeech/server/tests/asr/online/web/static/js/recorder/extensions/lib.fft.js b/demos/streaming_asr_server/web/static/js/recorder/extensions/lib.fft.js similarity index 100% rename from paddlespeech/server/tests/asr/online/web/static/js/recorder/extensions/lib.fft.js rename to demos/streaming_asr_server/web/static/js/recorder/extensions/lib.fft.js diff --git a/paddlespeech/server/tests/asr/online/web/static/js/recorder/recorder-core.js b/demos/streaming_asr_server/web/static/js/recorder/recorder-core.js similarity index 100% rename from paddlespeech/server/tests/asr/online/web/static/js/recorder/recorder-core.js rename to demos/streaming_asr_server/web/static/js/recorder/recorder-core.js diff --git a/paddlespeech/server/tests/asr/online/web/static/paddle.ico b/demos/streaming_asr_server/web/static/paddle.ico similarity index 100% rename from paddlespeech/server/tests/asr/online/web/static/paddle.ico rename to demos/streaming_asr_server/web/static/paddle.ico diff --git a/paddlespeech/server/tests/asr/online/web/templates/index.html b/demos/streaming_asr_server/web/templates/index.html similarity index 100% rename from paddlespeech/server/tests/asr/online/web/templates/index.html rename to demos/streaming_asr_server/web/templates/index.html diff --git a/demos/streaming_asr_server/websocket_client.py b/demos/streaming_asr_server/websocket_client.py new file mode 100644 index 00000000..2a15096c --- /dev/null +++ b/demos/streaming_asr_server/websocket_client.py @@ -0,0 +1,62 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#!/usr/bin/python +# -*- coding: UTF-8 -*- +import argparse +import asyncio +import codecs +import logging +import os + +from paddlespeech.cli.log import logger +from paddlespeech.server.utils.audio_handler import ASRAudioHandler + + +def main(args): + logger.info("asr websocket client start") + handler = ASRAudioHandler("127.0.0.1", 8090) + loop = asyncio.get_event_loop() + + # support to process single audio file + if args.wavfile and os.path.exists(args.wavfile): + logger.info(f"start to process the wavscp: {args.wavfile}") + result = loop.run_until_complete(handler.run(args.wavfile)) + result = result["asr_results"] + logger.info(f"asr websocket client finished : {result}") + + # support to process batch audios from wav.scp + if args.wavscp and os.path.exists(args.wavscp): + logging.info(f"start to process the wavscp: {args.wavscp}") + with codecs.open(args.wavscp, 'r', encoding='utf-8') as f,\ + codecs.open("result.txt", 'w', encoding='utf-8') as w: + for line in f: + utt_name, utt_path = line.strip().split() + result = loop.run_until_complete(handler.run(utt_path)) + result = result["asr_results"] + w.write(f"{utt_name} {result}\n") + + +if __name__ == "__main__": + logger.info("Start to do streaming asr client") + parser = argparse.ArgumentParser() + parser.add_argument( + "--wavfile", + action="store", + help="wav file path ", + default="./16_audio.wav") + parser.add_argument( + "--wavscp", type=str, default=None, help="The batch audios dict text") + args = parser.parse_args() + + main(args) diff --git a/paddlespeech/server/bin/paddlespeech_client.py b/paddlespeech/server/bin/paddlespeech_client.py index 45469178..522c7863 100644 --- a/paddlespeech/server/bin/paddlespeech_client.py +++ b/paddlespeech/server/bin/paddlespeech_client.py @@ -30,11 +30,14 @@ from ..executor import BaseExecutor from ..util import cli_client_register from ..util import stats_wrapper from paddlespeech.cli.log import logger -from paddlespeech.server.tests.asr.online.websocket_client import ASRAudioHandler +from paddlespeech.server.utils.audio_handler import ASRAudioHandler from paddlespeech.server.utils.audio_process import wav2pcm from paddlespeech.server.utils.util import wav2base64 -__all__ = ['TTSClientExecutor', 'ASRClientExecutor', 'CLSClientExecutor'] +__all__ = [ + 'TTSClientExecutor', 'ASRClientExecutor', 'ASRClientExecutor', + 'CLSClientExecutor' +] @cli_client_register( @@ -236,11 +239,11 @@ class ASRClientExecutor(BaseExecutor): @cli_client_register( name='paddlespeech_client.asr_online', description='visit asr online service') -class ASRClientExecutor(BaseExecutor): +class ASROnlineClientExecutor(BaseExecutor): def __init__(self): - super(ASRClientExecutor, self).__init__() + super(ASROnlineClientExecutor, self).__init__() self.parser = argparse.ArgumentParser( - prog='paddlespeech_client.asr', add_help=True) + prog='paddlespeech_client.asr_online', add_help=True) self.parser.add_argument( '--server_ip', type=str, default='127.0.0.1', help='server ip') self.parser.add_argument( @@ -305,6 +308,7 @@ class ASRClientExecutor(BaseExecutor): return res['asr_results'] + @cli_client_register( name='paddlespeech_client.cls', description='visit cls service') class CLSClientExecutor(BaseExecutor): diff --git a/paddlespeech/server/conf/ws_conformer_application.yaml b/paddlespeech/server/conf/ws_conformer_application.yaml index e14833de..2a76ee39 100644 --- a/paddlespeech/server/conf/ws_conformer_application.yaml +++ b/paddlespeech/server/conf/ws_conformer_application.yaml @@ -29,7 +29,7 @@ asr_online: cfg_path: decode_method: force_yes: True - + device: 'cpu' # cpu or gpu:id am_predictor_conf: device: # set 'gpu:id' or 'cpu' switch_ir_optim: True diff --git a/paddlespeech/server/engine/asr/online/asr_engine.py b/paddlespeech/server/engine/asr/online/asr_engine.py index 758cbaab..10e72024 100644 --- a/paddlespeech/server/engine/asr/online/asr_engine.py +++ b/paddlespeech/server/engine/asr/online/asr_engine.py @@ -1028,6 +1028,17 @@ class ASREngine(BaseEngine): self.output = "" self.executor = ASRServerExecutor() self.config = config + try: + if self.config.get("device", None): + self.device = self.config.device + else: + self.device = paddle.get_device() + logger.info(f"paddlespeech_server set the device: {self.device}") + paddle.set_device(self.device) + except BaseException: + logger.error( + "Set device failed, please check if device is already used and the parameter 'device' in the yaml file" + ) self.executor._init_from_path( model_type=self.config.model_type, diff --git a/paddlespeech/server/tests/asr/online/README_cn.md b/paddlespeech/server/tests/asr/online/README_cn.md deleted file mode 100644 index 3e7d7a15..00000000 --- a/paddlespeech/server/tests/asr/online/README_cn.md +++ /dev/null @@ -1,49 +0,0 @@ -([简体中文](./README_cn.md)|English) - -# 语音服务 - -## 介绍 -本文档介绍如何使用流式ASR的三种不同客户端:网页、麦克风、Python模拟流式服务。 - - -## 使用方法 -### 1. 安装 -请看 [安装文档](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md). - -推荐使用 **paddlepaddle 2.2.1** 或以上版本。 -你可以从 medium,hard 三中方式中选择一种方式安装 PaddleSpeech。 - - -### 2. 准备测试文件 - -这个 ASR client 的输入应该是一个 WAV 文件(`.wav`),并且采样率必须与模型的采样率相同。 - -可以下载此 ASR client的示例音频: -```bash -wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav -``` - -### 2. 流式 ASR 客户端使用方法 - -- Python模拟流式服务命令行 - ``` - - # 流式ASR - paddlespeech_client asr_online --server_ip 127.0.0.1 --port 8091 --input ./zh.wav - - ``` - - -- 麦克风 - ``` - # 直接调用麦克风设备 - python microphone_client.py - - ``` - - -- 网页 - ``` - # 进入web目录后参考相关readme.md - - ``` diff --git a/paddlespeech/server/tests/asr/online/__init__.py b/paddlespeech/server/tests/asr/online/__init__.py deleted file mode 100644 index 97043fd7..00000000 --- a/paddlespeech/server/tests/asr/online/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/paddlespeech/server/tests/asr/online/microphone_client.py b/paddlespeech/server/tests/asr/online/microphone_client.py deleted file mode 100644 index 2ceaf6d0..00000000 --- a/paddlespeech/server/tests/asr/online/microphone_client.py +++ /dev/null @@ -1,161 +0,0 @@ -# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -record wave from the mic -""" -import asyncio -import json -import logging -import threading -import wave -from signal import SIGINT -from signal import SIGTERM - -import pyaudio -import websockets - - -class ASRAudioHandler(threading.Thread): - def __init__(self, url="127.0.0.1", port=8091): - threading.Thread.__init__(self) - self.url = url - self.port = port - self.url = "ws://" + self.url + ":" + str(self.port) + "/ws/asr" - self.fileName = "./output.wav" - self.chunk = 5120 - self.format = pyaudio.paInt16 - self.channels = 1 - self.rate = 16000 - self._running = True - self._frames = [] - self.data_backup = [] - - def startrecord(self): - """ - start a new thread to record wave - """ - threading._start_new_thread(self.recording, ()) - - def recording(self): - """ - recording wave - """ - self._running = True - self._frames = [] - p = pyaudio.PyAudio() - stream = p.open( - format=self.format, - channels=self.channels, - rate=self.rate, - input=True, - frames_per_buffer=self.chunk) - while (self._running): - data = stream.read(self.chunk) - self._frames.append(data) - self.data_backup.append(data) - - stream.stop_stream() - stream.close() - p.terminate() - - def save(self): - """ - save wave data - """ - p = pyaudio.PyAudio() - wf = wave.open(self.fileName, 'wb') - wf.setnchannels(self.channels) - wf.setsampwidth(p.get_sample_size(self.format)) - wf.setframerate(self.rate) - wf.writeframes(b''.join(self.data_backup)) - wf.close() - p.terminate() - - def stoprecord(self): - """ - stop recording - """ - self._running = False - - async def run(self): - aa = input("是否开始录音? (y/n)") - if aa.strip() == "y": - self.startrecord() - logging.info("*" * 10 + "开始录音,请输入语音") - - async with websockets.connect(self.url) as ws: - # 发送开始指令 - audio_info = json.dumps( - { - "name": "test.wav", - "signal": "start", - "nbest": 5 - }, - sort_keys=True, - indent=4, - separators=(',', ': ')) - await ws.send(audio_info) - msg = await ws.recv() - logging.info("receive msg={}".format(msg)) - - # send bytes data - logging.info("结束录音请: Ctrl + c。继续请按回车。") - try: - while True: - while len(self._frames) > 0: - await ws.send(self._frames.pop(0)) - msg = await ws.recv() - logging.info("receive msg={}".format(msg)) - except asyncio.CancelledError: - # quit - # send finished - audio_info = json.dumps( - { - "name": "test.wav", - "signal": "end", - "nbest": 5 - }, - sort_keys=True, - indent=4, - separators=(',', ': ')) - await ws.send(audio_info) - msg = await ws.recv() - logging.info("receive msg={}".format(msg)) - - self.stoprecord() - logging.info("*" * 10 + "录音结束") - self.save() - elif aa.strip() == "n": - exit() - else: - print("无效输入!") - exit() - - -if __name__ == "__main__": - - logging.basicConfig(level=logging.INFO) - logging.info("asr websocket client start") - - handler = ASRAudioHandler("127.0.0.1", 8091) - loop = asyncio.get_event_loop() - main_task = asyncio.ensure_future(handler.run()) - for signal in [SIGINT, SIGTERM]: - loop.add_signal_handler(signal, main_task.cancel) - try: - loop.run_until_complete(main_task) - finally: - loop.close() - - logging.info("asr websocket client finished") diff --git a/paddlespeech/server/tests/asr/online/websocket_client.py b/paddlespeech/server/tests/asr/online/websocket_client.py deleted file mode 100644 index 49cbd703..00000000 --- a/paddlespeech/server/tests/asr/online/websocket_client.py +++ /dev/null @@ -1,139 +0,0 @@ -# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -#!/usr/bin/python -# -*- coding: UTF-8 -*- -import argparse -import asyncio -import codecs -import json -import logging -import os - -import numpy as np -import soundfile -import websockets - - -class ASRAudioHandler: - def __init__(self, url="127.0.0.1", port=8090): - self.url = url - self.port = port - self.url = "ws://" + self.url + ":" + str(self.port) + "/ws/asr" - - def read_wave(self, wavfile_path: str): - samples, sample_rate = soundfile.read(wavfile_path, dtype='int16') - x_len = len(samples) - - chunk_size = 85 * 16 #80ms, sample_rate = 16kHz - if x_len % chunk_size!= 0: - padding_len_x = chunk_size - x_len % chunk_size - else: - padding_len_x = 0 - - padding = np.zeros((padding_len_x), dtype=samples.dtype) - padded_x = np.concatenate([samples, padding], axis=0) - - assert (x_len + padding_len_x) % chunk_size == 0 - num_chunk = (x_len + padding_len_x) / chunk_size - num_chunk = int(num_chunk) - for i in range(0, num_chunk): - start = i * chunk_size - end = start + chunk_size - x_chunk = padded_x[start:end] - yield x_chunk - - async def run(self, wavfile_path: str): - logging.info("send a message to the server") - # self.read_wave() - # send websocket handshake protocal - async with websockets.connect(self.url) as ws: - # server has already received handshake protocal - # client start to send the command - audio_info = json.dumps( - { - "name": "test.wav", - "signal": "start", - "nbest": 5 - }, - sort_keys=True, - indent=4, - separators=(',', ': ')) - await ws.send(audio_info) - msg = await ws.recv() - logging.info("receive msg={}".format(msg)) - - # send chunk audio data to engine - for chunk_data in self.read_wave(wavfile_path): - await ws.send(chunk_data.tobytes()) - msg = await ws.recv() - msg = json.loads(msg) - logging.info("receive msg={}".format(msg)) - - # finished - audio_info = json.dumps( - { - "name": "test.wav", - "signal": "end", - "nbest": 5 - }, - sort_keys=True, - indent=4, - separators=(',', ': ')) - await ws.send(audio_info) - msg = await ws.recv() - - # decode the bytes to str - msg = json.loads(msg) - logging.info("final receive msg={}".format(msg)) - result = msg - return result - - -def main(args): - logging.basicConfig(level=logging.INFO) - logging.info("asr websocket client start") - handler = ASRAudioHandler("127.0.0.1", 8090) - loop = asyncio.get_event_loop() - - # support to process single audio file - if args.wavfile and os.path.exists(args.wavfile): - logging.info(f"start to process the wavscp: {args.wavfile}") - result = loop.run_until_complete(handler.run(args.wavfile)) - result = result["asr_results"] - logging.info(f"asr websocket client finished : {result}") - - # support to process batch audios from wav.scp - if args.wavscp and os.path.exists(args.wavscp): - logging.info(f"start to process the wavscp: {args.wavscp}") - with codecs.open(args.wavscp, 'r', encoding='utf-8') as f,\ - codecs.open("result.txt", 'w', encoding='utf-8') as w: - for line in f: - utt_name, utt_path = line.strip().split() - result = loop.run_until_complete(handler.run(utt_path)) - result = result["asr_results"] - w.write(f"{utt_name} {result}\n") - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument( - "--wavfile", - action="store", - help="wav file path ", - default="./16_audio.wav") - parser.add_argument( - "--wavscp", type=str, default=None, help="The batch audios dict text") - args = parser.parse_args() - - main(args) From 4b76a01c85f54596a26001a1b6dedcec72784238 Mon Sep 17 00:00:00 2001 From: xiongxinlei Date: Thu, 21 Apr 2022 16:19:01 +0800 Subject: [PATCH 2/3] update en readme.md, test=doc --- demos/streaming_asr_server/README.md | 407 +++++++++++------- .../server/bin/paddlespeech_client.py | 2 +- .../server/tests/asr/online/README.md | 35 ++ .../server/tests/asr/online/README_cn.md | 42 ++ .../tests/asr/online/microphone_client.py | 161 +++++++ 5 files changed, 497 insertions(+), 150 deletions(-) create mode 100644 paddlespeech/server/tests/asr/online/README.md create mode 100644 paddlespeech/server/tests/asr/online/README_cn.md create mode 100644 paddlespeech/server/tests/asr/online/microphone_client.py diff --git a/demos/streaming_asr_server/README.md b/demos/streaming_asr_server/README.md index 0323d398..68c3b045 100644 --- a/demos/streaming_asr_server/README.md +++ b/demos/streaming_asr_server/README.md @@ -3,7 +3,7 @@ # Speech Server ## Introduction -This demo is an implementation of starting the voice service and accessing the service. It can be achieved with a single command using `paddlespeech_server` and `paddlespeech_client` or a few lines of code in python. +This demo is an implementation of starting the streaming speech service and accessing the service. It can be achieved with a single command using `paddlespeech_server` and `paddlespeech_client` or a few lines of code in python. ## Usage @@ -14,17 +14,16 @@ It is recommended to use **paddlepaddle 2.2.1** or above. You can choose one way from meduim and hard to install paddlespeech. ### 2. Prepare config File -The configuration file can be found in `conf/application.yaml` . -Among them, `engine_list` indicates the speech engine that will be included in the service to be started, in the format of `_`. -At present, the speech tasks integrated by the service include: asr (speech recognition), tts (text to sppech) and cls (audio classification). -Currently the engine type supports two forms: python and inference (Paddle Inference) +The configuration file can be found in `conf/ws_application.yaml` 和 `conf/ws_conformer_application.yaml`. + +At present, the speech tasks integrated by the model include: DeepSpeech2 and conformer. The input of ASR client demo should be a WAV file(`.wav`), and the sample rate must be the same as the model. Here are sample files for thisASR client demo that can be downloaded: ```bash -wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav +wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav ``` ### 3. Server Usage @@ -32,7 +31,7 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee ```bash # start the service - paddlespeech_server start --config_file ./conf/application.yaml + paddlespeech_server start --config_file ./conf/ws_conformer_application.yaml ``` Usage: @@ -41,19 +40,72 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee paddlespeech_server start --help ``` Arguments: - - `config_file`: yaml file of the app, defalut: ./conf/application.yaml + - `config_file`: yaml file of the app, defalut: ./conf/ws_conformer_application.yaml - `log_file`: log file. Default: ./log/paddlespeech.log Output: ```bash - [2022-02-23 11:17:32] [INFO] [server.py:64] Started server process [6384] - INFO: Waiting for application startup. - [2022-02-23 11:17:32] [INFO] [on.py:26] Waiting for application startup. - INFO: Application startup complete. - [2022-02-23 11:17:32] [INFO] [on.py:38] Application startup complete. - INFO: Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit) - [2022-02-23 11:17:32] [INFO] [server.py:204] Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit) - + [2022-04-21 15:52:18,126] [ INFO] - create the online asr engine instance + [2022-04-21 15:52:18,127] [ INFO] - paddlespeech_server set the device: cpu + [2022-04-21 15:52:18,128] [ INFO] - Load the pretrained model, tag = conformer_online_multicn-zh-16k + [2022-04-21 15:52:18,128] [ INFO] - File /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k/asr1_chunk_conformer_multi_cn_ckpt_0.2.3.model.tar.gz md5 checking... + [2022-04-21 15:52:18,727] [ INFO] - Use pretrained model stored in: /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k + [2022-04-21 15:52:18,727] [ INFO] - /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k + [2022-04-21 15:52:18,727] [ INFO] - /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k/model.yaml + [2022-04-21 15:52:18,727] [ INFO] - /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k/exp/chunk_conformer/checkpoints/multi_cn.pdparams + [2022-04-21 15:52:18,727] [ INFO] - /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k/exp/chunk_conformer/checkpoints/multi_cn.pdparams + [2022-04-21 15:52:19,446] [ INFO] - start to create the stream conformer asr engine + [2022-04-21 15:52:19,473] [ INFO] - model name: conformer_online + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + [2022-04-21 15:52:21,731] [ INFO] - create the transformer like model success + [2022-04-21 15:52:21,733] [ INFO] - Initialize ASR server engine successfully. + INFO: Started server process [11173] + [2022-04-21 15:52:21] [INFO] [server.py:75] Started server process [11173] + INFO: Waiting for application startup. + [2022-04-21 15:52:21] [INFO] [on.py:45] Waiting for application startup. + INFO: Application startup complete. + [2022-04-21 15:52:21] [INFO] [on.py:59] Application startup complete. + /home/users/xiongxinlei/.conda/envs/paddlespeech/lib/python3.9/asyncio/base_events.py:1460: DeprecationWarning: The loop argument is deprecated since Python 3.8, and scheduled for removal in Python 3.10. + infos = await tasks.gather(*fs, loop=self) + /home/users/xiongxinlei/.conda/envs/paddlespeech/lib/python3.9/asyncio/base_events.py:1518: DeprecationWarning: The loop argument is deprecated since Python 3.8, and scheduled for removal in Python 3.10. + await tasks.sleep(0, loop=self) + INFO: Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit) + [2022-04-21 15:52:21] [INFO] [server.py:206] Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit) ``` - Python API @@ -62,21 +114,73 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee server_executor = ServerExecutor() server_executor( - config_file="./conf/application.yaml", + config_file="./conf/ws_conformer_application.yaml", log_file="./log/paddlespeech.log") ``` Output: ```bash - INFO: Started server process [529] - [2022-02-23 14:57:56] [INFO] [server.py:64] Started server process [529] - INFO: Waiting for application startup. - [2022-02-23 14:57:56] [INFO] [on.py:26] Waiting for application startup. - INFO: Application startup complete. - [2022-02-23 14:57:56] [INFO] [on.py:38] Application startup complete. - INFO: Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit) - [2022-02-23 14:57:56] [INFO] [server.py:204] Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit) - + [2022-04-21 15:52:18,126] [ INFO] - create the online asr engine instance + [2022-04-21 15:52:18,127] [ INFO] - paddlespeech_server set the device: cpu + [2022-04-21 15:52:18,128] [ INFO] - Load the pretrained model, tag = conformer_online_multicn-zh-16k + [2022-04-21 15:52:18,128] [ INFO] - File /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k/asr1_chunk_conformer_multi_cn_ckpt_0.2.3.model.tar.gz md5 checking... + [2022-04-21 15:52:18,727] [ INFO] - Use pretrained model stored in: /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k + [2022-04-21 15:52:18,727] [ INFO] - /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k + [2022-04-21 15:52:18,727] [ INFO] - /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k/model.yaml + [2022-04-21 15:52:18,727] [ INFO] - /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k/exp/chunk_conformer/checkpoints/multi_cn.pdparams + [2022-04-21 15:52:18,727] [ INFO] - /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k/exp/chunk_conformer/checkpoints/multi_cn.pdparams + [2022-04-21 15:52:19,446] [ INFO] - start to create the stream conformer asr engine + [2022-04-21 15:52:19,473] [ INFO] - model name: conformer_online + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + [2022-04-21 15:52:21,731] [ INFO] - create the transformer like model success + [2022-04-21 15:52:21,733] [ INFO] - Initialize ASR server engine successfully. + INFO: Started server process [11173] + [2022-04-21 15:52:21] [INFO] [server.py:75] Started server process [11173] + INFO: Waiting for application startup. + [2022-04-21 15:52:21] [INFO] [on.py:45] Waiting for application startup. + INFO: Application startup complete. + [2022-04-21 15:52:21] [INFO] [on.py:59] Application startup complete. + /home/users/xiongxinlei/.conda/envs/paddlespeech/lib/python3.9/asyncio/base_events.py:1460: DeprecationWarning: The loop argument is deprecated since Python 3.8, and scheduled for removal in Python 3.10. + infos = await tasks.gather(*fs, loop=self) + /home/users/xiongxinlei/.conda/envs/paddlespeech/lib/python3.9/asyncio/base_events.py:1518: DeprecationWarning: The loop argument is deprecated since Python 3.8, and scheduled for removal in Python 3.10. + await tasks.sleep(0, loop=self) + INFO: Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit) + [2022-04-21 15:52:21] [INFO] [server.py:206] Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit) ``` @@ -84,13 +188,13 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee **Note:** The response time will be slightly longer when using the client for the first time - Command Line (Recommended) ``` - paddlespeech_client asr --server_ip 127.0.0.1 --port 8090 --input ./zh.wav + paddlespeech_client asr_online --server_ip 127.0.0.1 --port 8090 --input ./zh.wav ``` Usage: ```bash - paddlespeech_client asr --help + paddlespeech_client asr_online --help ``` Arguments: - `server_ip`: server ip. Default: 127.0.0.1 @@ -102,8 +206,69 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee Output: ```bash - [2022-02-23 18:11:22,819] [ INFO] - {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'transcription': '我认为跑步最重要的就是给我带来了身体健康'}} - [2022-02-23 18:11:22,820] [ INFO] - time cost 0.689145 s. + [2022-04-21 15:59:03,904] [ INFO] - receive msg={"status": "ok", "signal": "server_ready"} + [2022-04-21 15:59:03,960] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:03,973] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:03,987] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,000] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,012] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,024] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,036] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,047] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,607] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,620] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,633] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,645] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,657] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,669] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,680] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:05,176] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,185] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,192] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,200] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,208] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,216] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,224] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,232] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,724] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:05,732] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:05,740] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:05,747] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:05,755] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:05,763] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:05,770] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:06,271] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,279] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,287] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,294] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,302] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,310] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,318] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,326] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,833] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:06,842] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:06,850] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:06,858] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:06,866] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:06,874] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:06,882] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:07,400] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,408] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,416] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,424] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,432] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,440] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,447] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,455] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,984] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:07,992] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:08,001] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:08,008] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:08,016] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:08,024] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:12,883] [ INFO] - final receive msg={'status': 'ok', 'signal': 'finished', 'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:12,884] [ INFO] - 我认为跑步最重要的就是给我带来了身体健康 + [2022-04-21 15:59:12,884] [ INFO] - Response time 9.051567 s. ``` @@ -125,122 +290,66 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee Output: ```bash - {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'transcription': '我认为跑步最重要的就是给我带来了身体健康'}} - ``` - -### 5. TTS Client Usage -**Note:** The response time will be slightly longer when using the client for the first time -- Command Line (Recommended) - ```bash - paddlespeech_client tts --server_ip 127.0.0.1 --port 8090 --input "您好,欢迎使用百度飞桨语音合成服务。" --output output.wav - ``` - Usage: - - ```bash - paddlespeech_client tts --help - ``` - Arguments: - - `server_ip`: server ip. Default: 127.0.0.1 - - `port`: server port. Default: 8090 - - `input`(required): Input text to generate. - - `spk_id`: Speaker id for multi-speaker text to speech. Default: 0 - - `speed`: Audio speed, the value should be set between 0 and 3. Default: 1.0 - - `volume`: Audio volume, the value should be set between 0 and 3. Default: 1.0 - - `sample_rate`: Sampling rate, choice: [0, 8000, 16000], the default is the same as the model. Default: 0 - - `output`: Output wave filepath. Default: None, which means not to save the audio to the local. - - Output: - ```bash - [2022-02-23 15:20:37,875] [ INFO] - {'description': 'success.'} - [2022-02-23 15:20:37,875] [ INFO] - Save synthesized audio successfully on output.wav. - [2022-02-23 15:20:37,875] [ INFO] - Audio duration: 3.612500 s. - [2022-02-23 15:20:37,875] [ INFO] - Response time: 0.348050 s. - - ``` - -- Python API - ```python - from paddlespeech.server.bin.paddlespeech_client import TTSClientExecutor - import json - - ttsclient_executor = TTSClientExecutor() - res = ttsclient_executor( - input="您好,欢迎使用百度飞桨语音合成服务。", - server_ip="127.0.0.1", - port=8090, - spk_id=0, - speed=1.0, - volume=1.0, - sample_rate=0, - output="./output.wav") - - response_dict = res.json() - print(response_dict["message"]) - print("Save synthesized audio successfully on %s." % (response_dict['result']['save_path'])) - print("Audio duration: %f s." %(response_dict['result']['duration'])) - ``` - - Output: - ```bash - {'description': 'success.'} - Save synthesized audio successfully on ./output.wav. - Audio duration: 3.612500 s. - - ``` - -### 6. CLS Client Usage -**Note:** The response time will be slightly longer when using the client for the first time -- Command Line (Recommended) - ``` - paddlespeech_client cls --server_ip 127.0.0.1 --port 8090 --input ./zh.wav - ``` - - Usage: - - ```bash - paddlespeech_client cls --help - ``` - Arguments: - - `server_ip`: server ip. Default: 127.0.0.1 - - `port`: server port. Default: 8090 - - `input`(required): Audio file to be classified. - - `topk`: topk scores of classification result. - - Output: - ```bash - [2022-03-09 20:44:39,974] [ INFO] - {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'topk': 1, 'results': [{'class_name': 'Speech', 'prob': 0.9027184844017029}]}} - [2022-03-09 20:44:39,975] [ INFO] - Response time 0.104360 s. - - - ``` - -- Python API - ```python - from paddlespeech.server.bin.paddlespeech_client import CLSClientExecutor - import json - - clsclient_executor = CLSClientExecutor() - res = clsclient_executor( - input="./zh.wav", - server_ip="127.0.0.1", - port=8090, - topk=1) - print(res.json()) - ``` - - Output: - ```bash - {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'topk': 1, 'results': [{'class_name': 'Speech', 'prob': 0.9027184844017029}]}} - - ``` - - -## Models supported by the service -### ASR model -Get all models supported by the ASR service via `paddlespeech_server stats --task asr`, where static models can be used for paddle inference inference. - -### TTS model -Get all models supported by the TTS service via `paddlespeech_server stats --task tts`, where static models can be used for paddle inference inference. - -### CLS model -Get all models supported by the CLS service via `paddlespeech_server stats --task cls`, where static models can be used for paddle inference inference. + [2022-04-21 15:59:03,904] [ INFO] - receive msg={"status": "ok", "signal": "server_ready"} + [2022-04-21 15:59:03,960] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:03,973] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:03,987] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,000] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,012] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,024] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,036] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,047] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,607] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,620] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,633] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,645] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,657] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,669] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,680] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:05,176] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,185] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,192] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,200] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,208] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,216] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,224] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,232] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,724] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:05,732] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:05,740] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:05,747] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:05,755] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:05,763] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:05,770] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:06,271] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,279] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,287] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,294] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,302] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,310] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,318] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,326] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,833] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:06,842] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:06,850] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:06,858] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:06,866] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:06,874] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:06,882] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:07,400] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,408] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,416] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,424] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,432] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,440] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,447] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,455] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,984] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:07,992] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:08,001] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:08,008] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:08,016] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:08,024] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:12,883] [ INFO] - final receive msg={'status': 'ok', 'signal': 'finished', 'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:12,884] [ INFO] - 我认为跑步最重要的就是给我带来了身体健康 + ``` \ No newline at end of file diff --git a/paddlespeech/server/bin/paddlespeech_client.py b/paddlespeech/server/bin/paddlespeech_client.py index 522c7863..d7858be6 100644 --- a/paddlespeech/server/bin/paddlespeech_client.py +++ b/paddlespeech/server/bin/paddlespeech_client.py @@ -35,7 +35,7 @@ from paddlespeech.server.utils.audio_process import wav2pcm from paddlespeech.server.utils.util import wav2base64 __all__ = [ - 'TTSClientExecutor', 'ASRClientExecutor', 'ASRClientExecutor', + 'TTSClientExecutor', 'ASRClientExecutor', 'ASROnlineClientExecutor', 'CLSClientExecutor' ] diff --git a/paddlespeech/server/tests/asr/online/README.md b/paddlespeech/server/tests/asr/online/README.md new file mode 100644 index 00000000..e1e4d950 --- /dev/null +++ b/paddlespeech/server/tests/asr/online/README.md @@ -0,0 +1,35 @@ +([简体中文](./README_cn.md)|English) + +# Speech Service + +## Introduction + +This document introduces a client for streaming asr service: microphone + + +## Usage +### 1. Install +Refer [Install](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md). + + **paddlepaddle 2.2.1** 或以上版本。 +It is recommended to use **paddlepaddle 2.2.1** or above. +You can choose one way from meduim and hard to install paddlespeech. + + +### 2. Prepare config File + + +The input of ASR client demo should be a WAV file(`.wav`), and the sample rate must be the same as the model. + +Here are sample files for thisASR client demo that can be downloaded: +```bash +wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav +``` + +### 2. Streaming ASR Client Usage + +- microphone + ``` + python microphone_client.py + + ``` diff --git a/paddlespeech/server/tests/asr/online/README_cn.md b/paddlespeech/server/tests/asr/online/README_cn.md new file mode 100644 index 00000000..46dff250 --- /dev/null +++ b/paddlespeech/server/tests/asr/online/README_cn.md @@ -0,0 +1,42 @@ +([English](./README.md)|中文) + +# 语音服务 + +## 介绍 +本文档介绍如何使用流式ASR的一种不同客户端:麦克风。 + + +## 使用方法 +### 1. 安装 +请看 [安装文档](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md). + +推荐使用 **paddlepaddle 2.2.1** 或以上版本。 +你可以从 medium,hard 三中方式中选择一种方式安装 PaddleSpeech。 + + +### 2. 准备测试文件 + +这个 ASR client 的输入应该是一个 WAV 文件(`.wav`),并且采样率必须与模型的采样率相同。 + +可以下载此 ASR client的示例音频: +```bash +wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav +``` + +### 2. 流式 ASR 客户端使用方法 + +- Python模拟流式服务命令行 + ``` + + # 流式ASR + paddlespeech_client asr_online --server_ip 127.0.0.1 --port 8091 --input ./zh.wav + + ``` + + +- 麦克风 + ``` + # 直接调用麦克风设备 + python microphone_client.py + + ``` diff --git a/paddlespeech/server/tests/asr/online/microphone_client.py b/paddlespeech/server/tests/asr/online/microphone_client.py new file mode 100644 index 00000000..2ceaf6d0 --- /dev/null +++ b/paddlespeech/server/tests/asr/online/microphone_client.py @@ -0,0 +1,161 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +record wave from the mic +""" +import asyncio +import json +import logging +import threading +import wave +from signal import SIGINT +from signal import SIGTERM + +import pyaudio +import websockets + + +class ASRAudioHandler(threading.Thread): + def __init__(self, url="127.0.0.1", port=8091): + threading.Thread.__init__(self) + self.url = url + self.port = port + self.url = "ws://" + self.url + ":" + str(self.port) + "/ws/asr" + self.fileName = "./output.wav" + self.chunk = 5120 + self.format = pyaudio.paInt16 + self.channels = 1 + self.rate = 16000 + self._running = True + self._frames = [] + self.data_backup = [] + + def startrecord(self): + """ + start a new thread to record wave + """ + threading._start_new_thread(self.recording, ()) + + def recording(self): + """ + recording wave + """ + self._running = True + self._frames = [] + p = pyaudio.PyAudio() + stream = p.open( + format=self.format, + channels=self.channels, + rate=self.rate, + input=True, + frames_per_buffer=self.chunk) + while (self._running): + data = stream.read(self.chunk) + self._frames.append(data) + self.data_backup.append(data) + + stream.stop_stream() + stream.close() + p.terminate() + + def save(self): + """ + save wave data + """ + p = pyaudio.PyAudio() + wf = wave.open(self.fileName, 'wb') + wf.setnchannels(self.channels) + wf.setsampwidth(p.get_sample_size(self.format)) + wf.setframerate(self.rate) + wf.writeframes(b''.join(self.data_backup)) + wf.close() + p.terminate() + + def stoprecord(self): + """ + stop recording + """ + self._running = False + + async def run(self): + aa = input("是否开始录音? (y/n)") + if aa.strip() == "y": + self.startrecord() + logging.info("*" * 10 + "开始录音,请输入语音") + + async with websockets.connect(self.url) as ws: + # 发送开始指令 + audio_info = json.dumps( + { + "name": "test.wav", + "signal": "start", + "nbest": 5 + }, + sort_keys=True, + indent=4, + separators=(',', ': ')) + await ws.send(audio_info) + msg = await ws.recv() + logging.info("receive msg={}".format(msg)) + + # send bytes data + logging.info("结束录音请: Ctrl + c。继续请按回车。") + try: + while True: + while len(self._frames) > 0: + await ws.send(self._frames.pop(0)) + msg = await ws.recv() + logging.info("receive msg={}".format(msg)) + except asyncio.CancelledError: + # quit + # send finished + audio_info = json.dumps( + { + "name": "test.wav", + "signal": "end", + "nbest": 5 + }, + sort_keys=True, + indent=4, + separators=(',', ': ')) + await ws.send(audio_info) + msg = await ws.recv() + logging.info("receive msg={}".format(msg)) + + self.stoprecord() + logging.info("*" * 10 + "录音结束") + self.save() + elif aa.strip() == "n": + exit() + else: + print("无效输入!") + exit() + + +if __name__ == "__main__": + + logging.basicConfig(level=logging.INFO) + logging.info("asr websocket client start") + + handler = ASRAudioHandler("127.0.0.1", 8091) + loop = asyncio.get_event_loop() + main_task = asyncio.ensure_future(handler.run()) + for signal in [SIGINT, SIGTERM]: + loop.add_signal_handler(signal, main_task.cancel) + try: + loop.run_until_complete(main_task) + finally: + loop.close() + + logging.info("asr websocket client finished") From 56751a1ed549b195172ecaec4681819ee3c4e4c4 Mon Sep 17 00:00:00 2001 From: xiongxinlei Date: Thu, 21 Apr 2022 16:44:58 +0800 Subject: [PATCH 3/3] update the server device to paddle.device, test=doc --- demos/streaming_asr_server/conf/ws_conformer_application.yaml | 2 +- paddlespeech/server/conf/ws_conformer_application.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/demos/streaming_asr_server/conf/ws_conformer_application.yaml b/demos/streaming_asr_server/conf/ws_conformer_application.yaml index 2a76ee39..8f011485 100644 --- a/demos/streaming_asr_server/conf/ws_conformer_application.yaml +++ b/demos/streaming_asr_server/conf/ws_conformer_application.yaml @@ -29,7 +29,7 @@ asr_online: cfg_path: decode_method: force_yes: True - device: 'cpu' # cpu or gpu:id + device: # cpu or gpu:id am_predictor_conf: device: # set 'gpu:id' or 'cpu' switch_ir_optim: True diff --git a/paddlespeech/server/conf/ws_conformer_application.yaml b/paddlespeech/server/conf/ws_conformer_application.yaml index 2a76ee39..9c042534 100644 --- a/paddlespeech/server/conf/ws_conformer_application.yaml +++ b/paddlespeech/server/conf/ws_conformer_application.yaml @@ -29,7 +29,7 @@ asr_online: cfg_path: decode_method: force_yes: True - device: 'cpu' # cpu or gpu:id + device: # cpu or gpu:id am_predictor_conf: device: # set 'gpu:id' or 'cpu' switch_ir_optim: True