diff --git a/demos/streaming_asr_server/README.md b/demos/streaming_asr_server/README.md new file mode 100644 index 00000000..68c3b045 --- /dev/null +++ b/demos/streaming_asr_server/README.md @@ -0,0 +1,355 @@ +([简体中文](./README_cn.md)|English) + +# Speech Server + +## Introduction +This demo is an implementation of starting the streaming speech service and accessing the service. It can be achieved with a single command using `paddlespeech_server` and `paddlespeech_client` or a few lines of code in python. + + +## Usage +### 1. Installation +see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md). + +It is recommended to use **paddlepaddle 2.2.1** or above. +You can choose one way from meduim and hard to install paddlespeech. + +### 2. Prepare config File +The configuration file can be found in `conf/ws_application.yaml` 和 `conf/ws_conformer_application.yaml`. + +At present, the speech tasks integrated by the model include: DeepSpeech2 and conformer. + + +The input of ASR client demo should be a WAV file(`.wav`), and the sample rate must be the same as the model. + +Here are sample files for thisASR client demo that can be downloaded: +```bash +wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav +``` + +### 3. Server Usage +- Command Line (Recommended) + + ```bash + # start the service + paddlespeech_server start --config_file ./conf/ws_conformer_application.yaml + ``` + + Usage: + + ```bash + paddlespeech_server start --help + ``` + Arguments: + - `config_file`: yaml file of the app, defalut: ./conf/ws_conformer_application.yaml + - `log_file`: log file. Default: ./log/paddlespeech.log + + Output: + ```bash + [2022-04-21 15:52:18,126] [ INFO] - create the online asr engine instance + [2022-04-21 15:52:18,127] [ INFO] - paddlespeech_server set the device: cpu + [2022-04-21 15:52:18,128] [ INFO] - Load the pretrained model, tag = conformer_online_multicn-zh-16k + [2022-04-21 15:52:18,128] [ INFO] - File /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k/asr1_chunk_conformer_multi_cn_ckpt_0.2.3.model.tar.gz md5 checking... + [2022-04-21 15:52:18,727] [ INFO] - Use pretrained model stored in: /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k + [2022-04-21 15:52:18,727] [ INFO] - /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k + [2022-04-21 15:52:18,727] [ INFO] - /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k/model.yaml + [2022-04-21 15:52:18,727] [ INFO] - /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k/exp/chunk_conformer/checkpoints/multi_cn.pdparams + [2022-04-21 15:52:18,727] [ INFO] - /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k/exp/chunk_conformer/checkpoints/multi_cn.pdparams + [2022-04-21 15:52:19,446] [ INFO] - start to create the stream conformer asr engine + [2022-04-21 15:52:19,473] [ INFO] - model name: conformer_online + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + [2022-04-21 15:52:21,731] [ INFO] - create the transformer like model success + [2022-04-21 15:52:21,733] [ INFO] - Initialize ASR server engine successfully. + INFO: Started server process [11173] + [2022-04-21 15:52:21] [INFO] [server.py:75] Started server process [11173] + INFO: Waiting for application startup. + [2022-04-21 15:52:21] [INFO] [on.py:45] Waiting for application startup. + INFO: Application startup complete. + [2022-04-21 15:52:21] [INFO] [on.py:59] Application startup complete. + /home/users/xiongxinlei/.conda/envs/paddlespeech/lib/python3.9/asyncio/base_events.py:1460: DeprecationWarning: The loop argument is deprecated since Python 3.8, and scheduled for removal in Python 3.10. + infos = await tasks.gather(*fs, loop=self) + /home/users/xiongxinlei/.conda/envs/paddlespeech/lib/python3.9/asyncio/base_events.py:1518: DeprecationWarning: The loop argument is deprecated since Python 3.8, and scheduled for removal in Python 3.10. + await tasks.sleep(0, loop=self) + INFO: Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit) + [2022-04-21 15:52:21] [INFO] [server.py:206] Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit) + ``` + +- Python API + ```python + from paddlespeech.server.bin.paddlespeech_server import ServerExecutor + + server_executor = ServerExecutor() + server_executor( + config_file="./conf/ws_conformer_application.yaml", + log_file="./log/paddlespeech.log") + ``` + + Output: + ```bash + [2022-04-21 15:52:18,126] [ INFO] - create the online asr engine instance + [2022-04-21 15:52:18,127] [ INFO] - paddlespeech_server set the device: cpu + [2022-04-21 15:52:18,128] [ INFO] - Load the pretrained model, tag = conformer_online_multicn-zh-16k + [2022-04-21 15:52:18,128] [ INFO] - File /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k/asr1_chunk_conformer_multi_cn_ckpt_0.2.3.model.tar.gz md5 checking... + [2022-04-21 15:52:18,727] [ INFO] - Use pretrained model stored in: /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k + [2022-04-21 15:52:18,727] [ INFO] - /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k + [2022-04-21 15:52:18,727] [ INFO] - /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k/model.yaml + [2022-04-21 15:52:18,727] [ INFO] - /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k/exp/chunk_conformer/checkpoints/multi_cn.pdparams + [2022-04-21 15:52:18,727] [ INFO] - /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k/exp/chunk_conformer/checkpoints/multi_cn.pdparams + [2022-04-21 15:52:19,446] [ INFO] - start to create the stream conformer asr engine + [2022-04-21 15:52:19,473] [ INFO] - model name: conformer_online + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + [2022-04-21 15:52:21,731] [ INFO] - create the transformer like model success + [2022-04-21 15:52:21,733] [ INFO] - Initialize ASR server engine successfully. + INFO: Started server process [11173] + [2022-04-21 15:52:21] [INFO] [server.py:75] Started server process [11173] + INFO: Waiting for application startup. + [2022-04-21 15:52:21] [INFO] [on.py:45] Waiting for application startup. + INFO: Application startup complete. + [2022-04-21 15:52:21] [INFO] [on.py:59] Application startup complete. + /home/users/xiongxinlei/.conda/envs/paddlespeech/lib/python3.9/asyncio/base_events.py:1460: DeprecationWarning: The loop argument is deprecated since Python 3.8, and scheduled for removal in Python 3.10. + infos = await tasks.gather(*fs, loop=self) + /home/users/xiongxinlei/.conda/envs/paddlespeech/lib/python3.9/asyncio/base_events.py:1518: DeprecationWarning: The loop argument is deprecated since Python 3.8, and scheduled for removal in Python 3.10. + await tasks.sleep(0, loop=self) + INFO: Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit) + [2022-04-21 15:52:21] [INFO] [server.py:206] Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit) + ``` + + +### 4. ASR Client Usage +**Note:** The response time will be slightly longer when using the client for the first time +- Command Line (Recommended) + ``` + paddlespeech_client asr_online --server_ip 127.0.0.1 --port 8090 --input ./zh.wav + ``` + + Usage: + + ```bash + paddlespeech_client asr_online --help + ``` + Arguments: + - `server_ip`: server ip. Default: 127.0.0.1 + - `port`: server port. Default: 8090 + - `input`(required): Audio file to be recognized. + - `sample_rate`: Audio ampling rate, default: 16000. + - `lang`: Language. Default: "zh_cn". + - `audio_format`: Audio format. Default: "wav". + + Output: + ```bash + [2022-04-21 15:59:03,904] [ INFO] - receive msg={"status": "ok", "signal": "server_ready"} + [2022-04-21 15:59:03,960] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:03,973] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:03,987] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,000] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,012] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,024] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,036] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,047] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,607] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,620] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,633] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,645] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,657] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,669] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,680] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:05,176] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,185] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,192] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,200] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,208] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,216] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,224] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,232] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,724] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:05,732] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:05,740] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:05,747] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:05,755] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:05,763] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:05,770] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:06,271] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,279] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,287] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,294] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,302] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,310] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,318] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,326] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,833] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:06,842] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:06,850] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:06,858] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:06,866] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:06,874] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:06,882] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:07,400] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,408] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,416] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,424] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,432] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,440] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,447] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,455] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,984] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:07,992] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:08,001] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:08,008] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:08,016] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:08,024] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:12,883] [ INFO] - final receive msg={'status': 'ok', 'signal': 'finished', 'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:12,884] [ INFO] - 我认为跑步最重要的就是给我带来了身体健康 + [2022-04-21 15:59:12,884] [ INFO] - Response time 9.051567 s. + + ``` + +- Python API + ```python + from paddlespeech.server.bin.paddlespeech_client import ASRClientExecutor + import json + + asrclient_executor = ASRClientExecutor() + res = asrclient_executor( + input="./zh.wav", + server_ip="127.0.0.1", + port=8090, + sample_rate=16000, + lang="zh_cn", + audio_format="wav") + print(res.json()) + ``` + + Output: + ```bash + [2022-04-21 15:59:03,904] [ INFO] - receive msg={"status": "ok", "signal": "server_ready"} + [2022-04-21 15:59:03,960] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:03,973] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:03,987] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,000] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,012] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,024] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,036] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,047] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,607] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,620] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,633] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,645] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,657] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,669] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,680] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:05,176] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,185] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,192] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,200] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,208] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,216] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,224] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,232] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,724] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:05,732] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:05,740] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:05,747] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:05,755] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:05,763] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:05,770] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:06,271] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,279] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,287] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,294] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,302] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,310] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,318] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,326] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,833] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:06,842] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:06,850] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:06,858] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:06,866] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:06,874] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:06,882] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:07,400] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,408] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,416] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,424] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,432] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,440] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,447] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,455] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,984] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:07,992] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:08,001] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:08,008] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:08,016] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:08,024] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:12,883] [ INFO] - final receive msg={'status': 'ok', 'signal': 'finished', 'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:12,884] [ INFO] - 我认为跑步最重要的就是给我带来了身体健康 + ``` \ No newline at end of file diff --git a/demos/streaming_asr_server/README_cn.md b/demos/streaming_asr_server/README_cn.md new file mode 100644 index 00000000..c58e17e9 --- /dev/null +++ b/demos/streaming_asr_server/README_cn.md @@ -0,0 +1,356 @@ +([English](./README.md)|中文) + +# 语音服务 + +## 介绍 +这个demo是一个启动流式语音服务和访问服务的实现。 它可以通过使用`paddlespeech_server` 和 `paddlespeech_client`的单个命令或 python 的几行代码来实现。 + + +## 使用方法 +### 1. 安装 +请看 [安装文档](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md). + +推荐使用 **paddlepaddle 2.2.1** 或以上版本。 +你可以从 medium,hard 三中方式中选择一种方式安装 PaddleSpeech。 + + +### 2. 准备配置文件 +配置文件可参见 `conf/ws_application.yaml` 和 `conf/ws_conformer_application.yaml` 。 +目前服务集成的模型有: DeepSpeech2和conformer模型。 + + +这个 ASR client 的输入应该是一个 WAV 文件(`.wav`),并且采样率必须与模型的采样率相同。 + +可以下载此 ASR client的示例音频: +```bash +wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav +``` + +### 3. 服务端使用方法 +- 命令行 (推荐使用) + + ```bash + # 启动服务 + paddlespeech_server start --config_file ./conf/ws_conformer_application.yaml + ``` + + 使用方法: + + ```bash + paddlespeech_server start --help + ``` + 参数: + - `config_file`: 服务的配置文件,默认: ./conf/ws_conformer_application.yaml + - `log_file`: log 文件. 默认:./log/paddlespeech.log + + 输出: + ```bash + [2022-04-21 15:52:18,126] [ INFO] - create the online asr engine instance + [2022-04-21 15:52:18,127] [ INFO] - paddlespeech_server set the device: cpu + [2022-04-21 15:52:18,128] [ INFO] - Load the pretrained model, tag = conformer_online_multicn-zh-16k + [2022-04-21 15:52:18,128] [ INFO] - File /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k/asr1_chunk_conformer_multi_cn_ckpt_0.2.3.model.tar.gz md5 checking... + [2022-04-21 15:52:18,727] [ INFO] - Use pretrained model stored in: /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k + [2022-04-21 15:52:18,727] [ INFO] - /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k + [2022-04-21 15:52:18,727] [ INFO] - /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k/model.yaml + [2022-04-21 15:52:18,727] [ INFO] - /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k/exp/chunk_conformer/checkpoints/multi_cn.pdparams + [2022-04-21 15:52:18,727] [ INFO] - /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k/exp/chunk_conformer/checkpoints/multi_cn.pdparams + [2022-04-21 15:52:19,446] [ INFO] - start to create the stream conformer asr engine + [2022-04-21 15:52:19,473] [ INFO] - model name: conformer_online + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + [2022-04-21 15:52:21,731] [ INFO] - create the transformer like model success + [2022-04-21 15:52:21,733] [ INFO] - Initialize ASR server engine successfully. + INFO: Started server process [11173] + [2022-04-21 15:52:21] [INFO] [server.py:75] Started server process [11173] + INFO: Waiting for application startup. + [2022-04-21 15:52:21] [INFO] [on.py:45] Waiting for application startup. + INFO: Application startup complete. + [2022-04-21 15:52:21] [INFO] [on.py:59] Application startup complete. + /home/users/xiongxinlei/.conda/envs/paddlespeech/lib/python3.9/asyncio/base_events.py:1460: DeprecationWarning: The loop argument is deprecated since Python 3.8, and scheduled for removal in Python 3.10. + infos = await tasks.gather(*fs, loop=self) + /home/users/xiongxinlei/.conda/envs/paddlespeech/lib/python3.9/asyncio/base_events.py:1518: DeprecationWarning: The loop argument is deprecated since Python 3.8, and scheduled for removal in Python 3.10. + await tasks.sleep(0, loop=self) + INFO: Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit) + [2022-04-21 15:52:21] [INFO] [server.py:206] Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit) + ``` + +- Python API + ```python + from paddlespeech.server.bin.paddlespeech_server import ServerExecutor + + server_executor = ServerExecutor() + server_executor( + config_file="./conf/ws_conformer_application.yaml", + log_file="./log/paddlespeech.log") + ``` + + 输出: + ```bash + [2022-04-21 15:52:18,126] [ INFO] - create the online asr engine instance + [2022-04-21 15:52:18,127] [ INFO] - paddlespeech_server set the device: cpu + [2022-04-21 15:52:18,128] [ INFO] - Load the pretrained model, tag = conformer_online_multicn-zh-16k + [2022-04-21 15:52:18,128] [ INFO] - File /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k/asr1_chunk_conformer_multi_cn_ckpt_0.2.3.model.tar.gz md5 checking... + [2022-04-21 15:52:18,727] [ INFO] - Use pretrained model stored in: /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k + [2022-04-21 15:52:18,727] [ INFO] - /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k + [2022-04-21 15:52:18,727] [ INFO] - /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k/model.yaml + [2022-04-21 15:52:18,727] [ INFO] - /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k/exp/chunk_conformer/checkpoints/multi_cn.pdparams + [2022-04-21 15:52:18,727] [ INFO] - /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k/exp/chunk_conformer/checkpoints/multi_cn.pdparams + [2022-04-21 15:52:19,446] [ INFO] - start to create the stream conformer asr engine + [2022-04-21 15:52:19,473] [ INFO] - model name: conformer_online + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + [2022-04-21 15:52:21,731] [ INFO] - create the transformer like model success + [2022-04-21 15:52:21,733] [ INFO] - Initialize ASR server engine successfully. + INFO: Started server process [11173] + [2022-04-21 15:52:21] [INFO] [server.py:75] Started server process [11173] + INFO: Waiting for application startup. + [2022-04-21 15:52:21] [INFO] [on.py:45] Waiting for application startup. + INFO: Application startup complete. + [2022-04-21 15:52:21] [INFO] [on.py:59] Application startup complete. + /home/users/xiongxinlei/.conda/envs/paddlespeech/lib/python3.9/asyncio/base_events.py:1460: DeprecationWarning: The loop argument is deprecated since Python 3.8, and scheduled for removal in Python 3.10. + infos = await tasks.gather(*fs, loop=self) + /home/users/xiongxinlei/.conda/envs/paddlespeech/lib/python3.9/asyncio/base_events.py:1518: DeprecationWarning: The loop argument is deprecated since Python 3.8, and scheduled for removal in Python 3.10. + await tasks.sleep(0, loop=self) + INFO: Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit) + [2022-04-21 15:52:21] [INFO] [server.py:206] Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit) + ``` + +### 4. ASR 客户端使用方法 +**注意:** 初次使用客户端时响应时间会略长 +- 命令行 (推荐使用) + ``` + paddlespeech_client asr_online --server_ip 127.0.0.1 --port 8090 --input ./zh.wav + + ``` + + 使用帮助: + + ```bash + paddlespeech_client asr_online --help + ``` + + 参数: + - `server_ip`: 服务端ip地址,默认: 127.0.0.1。 + - `port`: 服务端口,默认: 8090。 + - `input`(必须输入): 用于识别的音频文件。 + - `sample_rate`: 音频采样率,默认值:16000。 + - `lang`: 模型语言,默认值:zh_cn。 + - `audio_format`: 音频格式,默认值:wav。 + + 输出: + + ```bash + [2022-04-21 15:59:03,904] [ INFO] - receive msg={"status": "ok", "signal": "server_ready"} + [2022-04-21 15:59:03,960] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:03,973] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:03,987] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,000] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,012] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,024] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,036] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,047] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,607] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,620] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,633] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,645] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,657] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,669] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,680] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:05,176] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,185] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,192] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,200] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,208] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,216] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,224] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,232] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,724] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:05,732] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:05,740] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:05,747] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:05,755] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:05,763] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:05,770] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:06,271] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,279] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,287] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,294] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,302] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,310] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,318] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,326] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,833] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:06,842] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:06,850] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:06,858] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:06,866] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:06,874] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:06,882] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:07,400] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,408] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,416] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,424] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,432] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,440] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,447] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,455] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,984] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:07,992] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:08,001] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:08,008] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:08,016] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:08,024] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:12,883] [ INFO] - final receive msg={'status': 'ok', 'signal': 'finished', 'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:12,884] [ INFO] - 我认为跑步最重要的就是给我带来了身体健康 + [2022-04-21 15:59:12,884] [ INFO] - Response time 9.051567 s. + ``` + +- Python API + ```python + from paddlespeech.server.bin.paddlespeech_client import ASROnlineClientExecutor + import json + + asrclient_executor = ASROnlineClientExecutor() + res = asrclient_executor( + input="./zh.wav", + server_ip="127.0.0.1", + port=8090, + sample_rate=16000, + lang="zh_cn", + audio_format="wav") + print(res.json()) + ``` + + 输出: + ```bash + [2022-04-21 15:59:03,904] [ INFO] - receive msg={"status": "ok", "signal": "server_ready"} + [2022-04-21 15:59:03,960] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:03,973] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:03,987] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,000] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,012] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,024] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,036] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,047] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,607] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,620] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,633] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,645] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,657] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,669] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,680] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:05,176] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,185] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,192] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,200] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,208] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,216] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,224] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,232] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,724] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:05,732] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:05,740] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:05,747] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:05,755] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:05,763] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:05,770] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:06,271] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,279] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,287] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,294] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,302] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,310] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,318] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,326] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,833] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:06,842] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:06,850] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:06,858] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:06,866] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:06,874] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:06,882] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:07,400] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,408] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,416] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,424] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,432] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,440] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,447] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,455] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,984] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:07,992] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:08,001] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:08,008] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:08,016] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:08,024] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:12,883] [ INFO] - final receive msg={'status': 'ok', 'signal': 'finished', 'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:12,884] [ INFO] - 我认为跑步最重要的就是给我带来了身体健康 + ``` \ No newline at end of file diff --git a/demos/streaming_asr_server/conf/ws_application.yaml b/demos/streaming_asr_server/conf/ws_application.yaml new file mode 100644 index 00000000..dee8d78b --- /dev/null +++ b/demos/streaming_asr_server/conf/ws_application.yaml @@ -0,0 +1,47 @@ +# This is the parameter configuration file for PaddleSpeech Serving. + +################################################################################# +# SERVER SETTING # +################################################################################# +host: 0.0.0.0 +port: 8090 + +# The task format in the engin_list is: _ +# task choices = ['asr_online', 'tts_online'] +# protocol = ['websocket', 'http'] (only one can be selected). +# websocket only support online engine type. +protocol: 'websocket' +engine_list: ['asr_online'] + + +################################################################################# +# ENGINE CONFIG # +################################################################################# + +################################### ASR ######################################### +################### speech task: asr; engine_type: online ####################### +asr_online: + model_type: 'deepspeech2online_aishell' + am_model: # the pdmodel file of am static model [optional] + am_params: # the pdiparams file of am static model [optional] + lang: 'zh' + sample_rate: 16000 + cfg_path: + decode_method: + force_yes: True + + am_predictor_conf: + device: # set 'gpu:id' or 'cpu' + switch_ir_optim: True + glog_info: False # True -> print glog + summary: True # False -> do not show predictor config + + chunk_buffer_conf: + frame_duration_ms: 80 + shift_ms: 40 + sample_rate: 16000 + sample_width: 2 + window_n: 7 # frame + shift_n: 4 # frame + window_ms: 20 # ms + shift_ms: 10 # ms diff --git a/demos/streaming_asr_server/conf/ws_conformer_application.yaml b/demos/streaming_asr_server/conf/ws_conformer_application.yaml new file mode 100644 index 00000000..8f011485 --- /dev/null +++ b/demos/streaming_asr_server/conf/ws_conformer_application.yaml @@ -0,0 +1,45 @@ +# This is the parameter configuration file for PaddleSpeech Serving. + +################################################################################# +# SERVER SETTING # +################################################################################# +host: 0.0.0.0 +port: 8090 + +# The task format in the engin_list is: _ +# task choices = ['asr_online', 'tts_online'] +# protocol = ['websocket', 'http'] (only one can be selected). +# websocket only support online engine type. +protocol: 'websocket' +engine_list: ['asr_online'] + + +################################################################################# +# ENGINE CONFIG # +################################################################################# + +################################### ASR ######################################### +################### speech task: asr; engine_type: online ####################### +asr_online: + model_type: 'conformer_online_multicn' + am_model: # the pdmodel file of am static model [optional] + am_params: # the pdiparams file of am static model [optional] + lang: 'zh' + sample_rate: 16000 + cfg_path: + decode_method: + force_yes: True + device: # cpu or gpu:id + am_predictor_conf: + device: # set 'gpu:id' or 'cpu' + switch_ir_optim: True + glog_info: False # True -> print glog + summary: True # False -> do not show predictor config + + chunk_buffer_conf: + window_n: 7 # frame + shift_n: 4 # frame + window_ms: 25 # ms + shift_ms: 10 # ms + sample_rate: 16000 + sample_width: 2 \ No newline at end of file diff --git a/demos/streaming_asr_server/run.sh b/demos/streaming_asr_server/run.sh new file mode 100644 index 00000000..d2ca3447 --- /dev/null +++ b/demos/streaming_asr_server/run.sh @@ -0,0 +1,2 @@ +# start the streaming asr service +paddlespeech_server start --config_file ./conf/ws_conformer_application.yaml \ No newline at end of file diff --git a/demos/streaming_asr_server/test.sh b/demos/streaming_asr_server/test.sh new file mode 100644 index 00000000..fe8155cf --- /dev/null +++ b/demos/streaming_asr_server/test.sh @@ -0,0 +1,5 @@ +# download the test wav +wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav + +# read the wav and pass it to service +python3 websocket_client.py --wavfile ./zh.wav diff --git a/paddlespeech/server/tests/asr/online/web/app.py b/demos/streaming_asr_server/web/app.py similarity index 100% rename from paddlespeech/server/tests/asr/online/web/app.py rename to demos/streaming_asr_server/web/app.py diff --git a/paddlespeech/server/tests/asr/online/web/paddle_web_demo.png b/demos/streaming_asr_server/web/paddle_web_demo.png similarity index 100% rename from paddlespeech/server/tests/asr/online/web/paddle_web_demo.png rename to demos/streaming_asr_server/web/paddle_web_demo.png diff --git a/paddlespeech/server/tests/asr/online/web/readme.md b/demos/streaming_asr_server/web/readme.md similarity index 100% rename from paddlespeech/server/tests/asr/online/web/readme.md rename to demos/streaming_asr_server/web/readme.md diff --git a/paddlespeech/server/tests/asr/online/web/static/css/font-awesome.min.css b/demos/streaming_asr_server/web/static/css/font-awesome.min.css similarity index 100% rename from paddlespeech/server/tests/asr/online/web/static/css/font-awesome.min.css rename to demos/streaming_asr_server/web/static/css/font-awesome.min.css diff --git a/paddlespeech/server/tests/asr/online/web/static/css/style.css b/demos/streaming_asr_server/web/static/css/style.css similarity index 100% rename from paddlespeech/server/tests/asr/online/web/static/css/style.css rename to demos/streaming_asr_server/web/static/css/style.css diff --git a/paddlespeech/server/tests/asr/online/web/static/fonts/FontAwesome.otf b/demos/streaming_asr_server/web/static/fonts/FontAwesome.otf similarity index 100% rename from paddlespeech/server/tests/asr/online/web/static/fonts/FontAwesome.otf rename to demos/streaming_asr_server/web/static/fonts/FontAwesome.otf diff --git a/paddlespeech/server/tests/asr/online/web/static/fonts/fontawesome-webfont.eot b/demos/streaming_asr_server/web/static/fonts/fontawesome-webfont.eot similarity index 100% rename from paddlespeech/server/tests/asr/online/web/static/fonts/fontawesome-webfont.eot rename to demos/streaming_asr_server/web/static/fonts/fontawesome-webfont.eot diff --git a/paddlespeech/server/tests/asr/online/web/static/fonts/fontawesome-webfont.svg b/demos/streaming_asr_server/web/static/fonts/fontawesome-webfont.svg similarity index 100% rename from paddlespeech/server/tests/asr/online/web/static/fonts/fontawesome-webfont.svg rename to demos/streaming_asr_server/web/static/fonts/fontawesome-webfont.svg diff --git a/paddlespeech/server/tests/asr/online/web/static/fonts/fontawesome-webfont.ttf b/demos/streaming_asr_server/web/static/fonts/fontawesome-webfont.ttf similarity index 100% rename from paddlespeech/server/tests/asr/online/web/static/fonts/fontawesome-webfont.ttf rename to demos/streaming_asr_server/web/static/fonts/fontawesome-webfont.ttf diff --git a/paddlespeech/server/tests/asr/online/web/static/fonts/fontawesome-webfont.woff b/demos/streaming_asr_server/web/static/fonts/fontawesome-webfont.woff similarity index 100% rename from paddlespeech/server/tests/asr/online/web/static/fonts/fontawesome-webfont.woff rename to demos/streaming_asr_server/web/static/fonts/fontawesome-webfont.woff diff --git a/paddlespeech/server/tests/asr/online/web/static/fonts/fontawesome-webfont.woff2 b/demos/streaming_asr_server/web/static/fonts/fontawesome-webfont.woff2 similarity index 100% rename from paddlespeech/server/tests/asr/online/web/static/fonts/fontawesome-webfont.woff2 rename to demos/streaming_asr_server/web/static/fonts/fontawesome-webfont.woff2 diff --git a/paddlespeech/server/tests/asr/online/web/static/image/PaddleSpeech_logo.png b/demos/streaming_asr_server/web/static/image/PaddleSpeech_logo.png similarity index 100% rename from paddlespeech/server/tests/asr/online/web/static/image/PaddleSpeech_logo.png rename to demos/streaming_asr_server/web/static/image/PaddleSpeech_logo.png diff --git a/paddlespeech/server/tests/asr/online/web/static/image/voice-dictation.svg b/demos/streaming_asr_server/web/static/image/voice-dictation.svg similarity index 100% rename from paddlespeech/server/tests/asr/online/web/static/image/voice-dictation.svg rename to demos/streaming_asr_server/web/static/image/voice-dictation.svg diff --git a/paddlespeech/server/tests/asr/online/web/static/js/SoundRecognizer.js b/demos/streaming_asr_server/web/static/js/SoundRecognizer.js similarity index 100% rename from paddlespeech/server/tests/asr/online/web/static/js/SoundRecognizer.js rename to demos/streaming_asr_server/web/static/js/SoundRecognizer.js diff --git a/paddlespeech/server/tests/asr/online/web/static/js/jquery-3.2.1.min.js b/demos/streaming_asr_server/web/static/js/jquery-3.2.1.min.js similarity index 100% rename from paddlespeech/server/tests/asr/online/web/static/js/jquery-3.2.1.min.js rename to demos/streaming_asr_server/web/static/js/jquery-3.2.1.min.js diff --git a/paddlespeech/server/tests/asr/online/web/static/js/recorder/engine/mp3.js b/demos/streaming_asr_server/web/static/js/recorder/engine/mp3.js similarity index 100% rename from paddlespeech/server/tests/asr/online/web/static/js/recorder/engine/mp3.js rename to demos/streaming_asr_server/web/static/js/recorder/engine/mp3.js diff --git a/paddlespeech/server/tests/asr/online/web/static/js/recorder/engine/pcm.js b/demos/streaming_asr_server/web/static/js/recorder/engine/pcm.js similarity index 100% rename from paddlespeech/server/tests/asr/online/web/static/js/recorder/engine/pcm.js rename to demos/streaming_asr_server/web/static/js/recorder/engine/pcm.js diff --git a/paddlespeech/server/tests/asr/online/web/static/js/recorder/engine/wav.js b/demos/streaming_asr_server/web/static/js/recorder/engine/wav.js similarity index 100% rename from paddlespeech/server/tests/asr/online/web/static/js/recorder/engine/wav.js rename to demos/streaming_asr_server/web/static/js/recorder/engine/wav.js diff --git a/paddlespeech/server/tests/asr/online/web/static/js/recorder/extensions/frequency.histogram.view.js b/demos/streaming_asr_server/web/static/js/recorder/extensions/frequency.histogram.view.js similarity index 100% rename from paddlespeech/server/tests/asr/online/web/static/js/recorder/extensions/frequency.histogram.view.js rename to demos/streaming_asr_server/web/static/js/recorder/extensions/frequency.histogram.view.js diff --git a/paddlespeech/server/tests/asr/online/web/static/js/recorder/extensions/lib.fft.js b/demos/streaming_asr_server/web/static/js/recorder/extensions/lib.fft.js similarity index 100% rename from paddlespeech/server/tests/asr/online/web/static/js/recorder/extensions/lib.fft.js rename to demos/streaming_asr_server/web/static/js/recorder/extensions/lib.fft.js diff --git a/paddlespeech/server/tests/asr/online/web/static/js/recorder/recorder-core.js b/demos/streaming_asr_server/web/static/js/recorder/recorder-core.js similarity index 100% rename from paddlespeech/server/tests/asr/online/web/static/js/recorder/recorder-core.js rename to demos/streaming_asr_server/web/static/js/recorder/recorder-core.js diff --git a/paddlespeech/server/tests/asr/online/web/static/paddle.ico b/demos/streaming_asr_server/web/static/paddle.ico similarity index 100% rename from paddlespeech/server/tests/asr/online/web/static/paddle.ico rename to demos/streaming_asr_server/web/static/paddle.ico diff --git a/paddlespeech/server/tests/asr/online/web/templates/index.html b/demos/streaming_asr_server/web/templates/index.html similarity index 100% rename from paddlespeech/server/tests/asr/online/web/templates/index.html rename to demos/streaming_asr_server/web/templates/index.html diff --git a/demos/streaming_asr_server/websocket_client.py b/demos/streaming_asr_server/websocket_client.py new file mode 100644 index 00000000..2a15096c --- /dev/null +++ b/demos/streaming_asr_server/websocket_client.py @@ -0,0 +1,62 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#!/usr/bin/python +# -*- coding: UTF-8 -*- +import argparse +import asyncio +import codecs +import logging +import os + +from paddlespeech.cli.log import logger +from paddlespeech.server.utils.audio_handler import ASRAudioHandler + + +def main(args): + logger.info("asr websocket client start") + handler = ASRAudioHandler("127.0.0.1", 8090) + loop = asyncio.get_event_loop() + + # support to process single audio file + if args.wavfile and os.path.exists(args.wavfile): + logger.info(f"start to process the wavscp: {args.wavfile}") + result = loop.run_until_complete(handler.run(args.wavfile)) + result = result["asr_results"] + logger.info(f"asr websocket client finished : {result}") + + # support to process batch audios from wav.scp + if args.wavscp and os.path.exists(args.wavscp): + logging.info(f"start to process the wavscp: {args.wavscp}") + with codecs.open(args.wavscp, 'r', encoding='utf-8') as f,\ + codecs.open("result.txt", 'w', encoding='utf-8') as w: + for line in f: + utt_name, utt_path = line.strip().split() + result = loop.run_until_complete(handler.run(utt_path)) + result = result["asr_results"] + w.write(f"{utt_name} {result}\n") + + +if __name__ == "__main__": + logger.info("Start to do streaming asr client") + parser = argparse.ArgumentParser() + parser.add_argument( + "--wavfile", + action="store", + help="wav file path ", + default="./16_audio.wav") + parser.add_argument( + "--wavscp", type=str, default=None, help="The batch audios dict text") + args = parser.parse_args() + + main(args) diff --git a/docs/source/released_model.md b/docs/source/released_model.md index a7d00f24..bd461fd7 100644 --- a/docs/source/released_model.md +++ b/docs/source/released_model.md @@ -8,7 +8,7 @@ Acoustic Model | Training Data | Token-based | Size | Descriptions | CER | WER | :-------------:| :------------:| :-----: | -----: | :-----: |:-----:| :-----: | :-----: | :-----: [Ds2 Online Aishell ASR0 Model](https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz) | Aishell Dataset | Char-based | 345 MB | 2 Conv + 5 LSTM layers with only forward direction | 0.078 |-| 151 h | [D2 Online Aishell ASR0](../../examples/aishell/asr0) [Ds2 Offline Aishell ASR0 Model](https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_aishell_ckpt_0.1.1.model.tar.gz)| Aishell Dataset | Char-based | 306 MB | 2 Conv + 3 bidirectional GRU layers| 0.064 |-| 151 h | [Ds2 Offline Aishell ASR0](../../examples/aishell/asr0) -[Conformer Online Aishell ASR1 Model](https://paddlespeech.bj.bcebos.com/s2t/aishell/asr1/asr1_chunk_conformer_aishell_ckpt_0.1.2.model.tar.gz) | Aishell Dataset | Char-based | 189 MB | Encoder:Conformer, Decoder:Transformer, Decoding method: Attention rescoring | 0.0565 |-| 151 h | [Conformer Online Aishell ASR1](../../examples/aishell/asr1) +[Conformer Online Aishell ASR1 Model](https://paddlespeech.bj.bcebos.com/s2t/aishell/asr1/asr1_chunk_conformer_aishell_ckpt_0.2.0.model.tar.gz) | Aishell Dataset | Char-based | 189 MB | Encoder:Conformer, Decoder:Transformer, Decoding method: Attention| 0.0534 |-| 151 h | [Conformer Online Aishell ASR1](../../examples/aishell/asr1) [Conformer Offline Aishell ASR1 Model](https://paddlespeech.bj.bcebos.com/s2t/aishell/asr1/asr1_conformer_aishell_ckpt_0.1.2.model.tar.gz) | Aishell Dataset | Char-based | 189 MB | Encoder:Conformer, Decoder:Transformer, Decoding method: Attention rescoring | 0.0483 |-| 151 h | [Conformer Offline Aishell ASR1](../../examples/aishell/asr1) [Transformer Aishell ASR1 Model](https://paddlespeech.bj.bcebos.com/s2t/aishell/asr1/asr1_transformer_aishell_ckpt_0.1.1.model.tar.gz) | Aishell Dataset | Char-based | 128 MB | Encoder:Transformer, Decoder:Transformer, Decoding method: Attention rescoring | 0.0523 || 151 h | [Transformer Aishell ASR1](../../examples/aishell/asr1) [Ds2 Offline Librispeech ASR0 Model](https://paddlespeech.bj.bcebos.com/s2t/librispeech/asr0/asr0_deepspeech2_librispeech_ckpt_0.1.1.model.tar.gz)| Librispeech Dataset | Char-based | 518 MB | 2 Conv + 3 bidirectional LSTM layers| - |0.0725| 960 h | [Ds2 Offline Librispeech ASR0](../../examples/librispeech/asr0) diff --git a/examples/aishell/asr1/RESULTS.md b/examples/aishell/asr1/RESULTS.md index 73cd57bd..7730baf1 100644 --- a/examples/aishell/asr1/RESULTS.md +++ b/examples/aishell/asr1/RESULTS.md @@ -18,10 +18,10 @@ Need set `decoding.decoding_chunk_size=16` when decoding. | Model | Params | Config | Augmentation| Test set | Decode method | Chunk Size & Left Chunks | Loss | CER | | --- | --- | --- | --- | --- | --- | --- | --- | --- | -| conformer | 47.06M | conf/chunk_conformer.yaml | spec_aug | test | attention | 16, -1 | - | 0.0573884 | -| conformer | 47.06M | conf/chunk_conformer.yaml | spec_aug | test | ctc_greedy_search | 16, -1 | - | 0.06599091 | -| conformer | 47.06M | conf/chunk_conformer.yaml | spec_aug | test | ctc_prefix_beam_search | 16, -1 | - | 0.065991 | -| conformer | 47.06M | conf/chunk_conformer.yaml | spec_aug | test | attention_rescoring | 16, -1 | - | 0.056502 | +| conformer | 47.06M | conf/chunk_conformer.yaml | spec_aug | test | attention | 16, -1 | - | 0.0534 | +| conformer | 47.06M | conf/chunk_conformer.yaml | spec_aug | test | ctc_greedy_search | 16, -1 | - | 0.0629 | +| conformer | 47.06M | conf/chunk_conformer.yaml | spec_aug | test | ctc_prefix_beam_search | 16, -1 | - | 0.0629 | +| conformer | 47.06M | conf/chunk_conformer.yaml | spec_aug | test | attention_rescoring | 16, -1 | - | 0.0544 | ## Transformer diff --git a/paddlespeech/server/bin/paddlespeech_client.py b/paddlespeech/server/bin/paddlespeech_client.py index 3ea14ab3..d7858be6 100644 --- a/paddlespeech/server/bin/paddlespeech_client.py +++ b/paddlespeech/server/bin/paddlespeech_client.py @@ -30,11 +30,14 @@ from ..executor import BaseExecutor from ..util import cli_client_register from ..util import stats_wrapper from paddlespeech.cli.log import logger -from paddlespeech.server.tests.asr.online.websocket_client import ASRAudioHandler +from paddlespeech.server.utils.audio_handler import ASRAudioHandler from paddlespeech.server.utils.audio_process import wav2pcm from paddlespeech.server.utils.util import wav2base64 -__all__ = ['TTSClientExecutor', 'ASRClientExecutor', 'CLSClientExecutor'] +__all__ = [ + 'TTSClientExecutor', 'ASRClientExecutor', 'ASROnlineClientExecutor', + 'CLSClientExecutor' +] @cli_client_register( @@ -236,11 +239,11 @@ class ASRClientExecutor(BaseExecutor): @cli_client_register( name='paddlespeech_client.asr_online', description='visit asr online service') -class ASRClientExecutor(BaseExecutor): +class ASROnlineClientExecutor(BaseExecutor): def __init__(self): - super(ASRClientExecutor, self).__init__() + super(ASROnlineClientExecutor, self).__init__() self.parser = argparse.ArgumentParser( - prog='paddlespeech_client.asr', add_help=True) + prog='paddlespeech_client.asr_online', add_help=True) self.parser.add_argument( '--server_ip', type=str, default='127.0.0.1', help='server ip') self.parser.add_argument( diff --git a/paddlespeech/server/conf/ws_conformer_application.yaml b/paddlespeech/server/conf/ws_conformer_application.yaml index e14833de..9c042534 100644 --- a/paddlespeech/server/conf/ws_conformer_application.yaml +++ b/paddlespeech/server/conf/ws_conformer_application.yaml @@ -29,7 +29,7 @@ asr_online: cfg_path: decode_method: force_yes: True - + device: # cpu or gpu:id am_predictor_conf: device: # set 'gpu:id' or 'cpu' switch_ir_optim: True diff --git a/paddlespeech/server/engine/asr/online/asr_engine.py b/paddlespeech/server/engine/asr/online/asr_engine.py index 758cbaab..10e72024 100644 --- a/paddlespeech/server/engine/asr/online/asr_engine.py +++ b/paddlespeech/server/engine/asr/online/asr_engine.py @@ -1028,6 +1028,17 @@ class ASREngine(BaseEngine): self.output = "" self.executor = ASRServerExecutor() self.config = config + try: + if self.config.get("device", None): + self.device = self.config.device + else: + self.device = paddle.get_device() + logger.info(f"paddlespeech_server set the device: {self.device}") + paddle.set_device(self.device) + except BaseException: + logger.error( + "Set device failed, please check if device is already used and the parameter 'device' in the yaml file" + ) self.executor._init_from_path( model_type=self.config.model_type, diff --git a/paddlespeech/server/tests/asr/online/README.md b/paddlespeech/server/tests/asr/online/README.md new file mode 100644 index 00000000..e1e4d950 --- /dev/null +++ b/paddlespeech/server/tests/asr/online/README.md @@ -0,0 +1,35 @@ +([简体中文](./README_cn.md)|English) + +# Speech Service + +## Introduction + +This document introduces a client for streaming asr service: microphone + + +## Usage +### 1. Install +Refer [Install](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md). + + **paddlepaddle 2.2.1** 或以上版本。 +It is recommended to use **paddlepaddle 2.2.1** or above. +You can choose one way from meduim and hard to install paddlespeech. + + +### 2. Prepare config File + + +The input of ASR client demo should be a WAV file(`.wav`), and the sample rate must be the same as the model. + +Here are sample files for thisASR client demo that can be downloaded: +```bash +wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav +``` + +### 2. Streaming ASR Client Usage + +- microphone + ``` + python microphone_client.py + + ``` diff --git a/paddlespeech/server/tests/asr/online/README_cn.md b/paddlespeech/server/tests/asr/online/README_cn.md index 3e7d7a15..46dff250 100644 --- a/paddlespeech/server/tests/asr/online/README_cn.md +++ b/paddlespeech/server/tests/asr/online/README_cn.md @@ -1,9 +1,9 @@ -([简体中文](./README_cn.md)|English) +([English](./README.md)|中文) # 语音服务 ## 介绍 -本文档介绍如何使用流式ASR的三种不同客户端:网页、麦克风、Python模拟流式服务。 +本文档介绍如何使用流式ASR的一种不同客户端:麦克风。 ## 使用方法 @@ -20,7 +20,7 @@ 可以下载此 ASR client的示例音频: ```bash -wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav +wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav ``` ### 2. 流式 ASR 客户端使用方法 @@ -40,10 +40,3 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee python microphone_client.py ``` - - -- 网页 - ``` - # 进入web目录后参考相关readme.md - - ``` diff --git a/paddlespeech/server/tests/asr/online/__init__.py b/paddlespeech/server/tests/asr/online/__init__.py deleted file mode 100644 index 97043fd7..00000000 --- a/paddlespeech/server/tests/asr/online/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/paddlespeech/server/tests/asr/online/websocket_client.py b/paddlespeech/server/tests/asr/online/websocket_client.py deleted file mode 100644 index 015698f5..00000000 --- a/paddlespeech/server/tests/asr/online/websocket_client.py +++ /dev/null @@ -1,139 +0,0 @@ -# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -#!/usr/bin/python -# -*- coding: UTF-8 -*- -import argparse -import asyncio -import codecs -import json -import logging -import os - -import numpy as np -import soundfile -import websockets - - -class ASRAudioHandler: - def __init__(self, url="127.0.0.1", port=8090): - self.url = url - self.port = port - self.url = "ws://" + self.url + ":" + str(self.port) + "/ws/asr" - - def read_wave(self, wavfile_path: str): - samples, sample_rate = soundfile.read(wavfile_path, dtype='int16') - x_len = len(samples) - - chunk_size = 85 * 16 #80ms, sample_rate = 16kHz - if x_len % chunk_size != 0: - padding_len_x = chunk_size - x_len % chunk_size - else: - padding_len_x = 0 - - padding = np.zeros((padding_len_x), dtype=samples.dtype) - padded_x = np.concatenate([samples, padding], axis=0) - - assert (x_len + padding_len_x) % chunk_size == 0 - num_chunk = (x_len + padding_len_x) / chunk_size - num_chunk = int(num_chunk) - for i in range(0, num_chunk): - start = i * chunk_size - end = start + chunk_size - x_chunk = padded_x[start:end] - yield x_chunk - - async def run(self, wavfile_path: str): - logging.info("send a message to the server") - # self.read_wave() - # send websocket handshake protocal - async with websockets.connect(self.url) as ws: - # server has already received handshake protocal - # client start to send the command - audio_info = json.dumps( - { - "name": "test.wav", - "signal": "start", - "nbest": 5 - }, - sort_keys=True, - indent=4, - separators=(',', ': ')) - await ws.send(audio_info) - msg = await ws.recv() - logging.info("receive msg={}".format(msg)) - - # send chunk audio data to engine - for chunk_data in self.read_wave(wavfile_path): - await ws.send(chunk_data.tobytes()) - msg = await ws.recv() - msg = json.loads(msg) - logging.info("receive msg={}".format(msg)) - - # finished - audio_info = json.dumps( - { - "name": "test.wav", - "signal": "end", - "nbest": 5 - }, - sort_keys=True, - indent=4, - separators=(',', ': ')) - await ws.send(audio_info) - msg = await ws.recv() - - # decode the bytes to str - msg = json.loads(msg) - logging.info("final receive msg={}".format(msg)) - result = msg - return result - - -def main(args): - logging.basicConfig(level=logging.INFO) - logging.info("asr websocket client start") - handler = ASRAudioHandler("127.0.0.1", 8090) - loop = asyncio.get_event_loop() - - # support to process single audio file - if args.wavfile and os.path.exists(args.wavfile): - logging.info(f"start to process the wavscp: {args.wavfile}") - result = loop.run_until_complete(handler.run(args.wavfile)) - result = result["asr_results"] - logging.info(f"asr websocket client finished : {result}") - - # support to process batch audios from wav.scp - if args.wavscp and os.path.exists(args.wavscp): - logging.info(f"start to process the wavscp: {args.wavscp}") - with codecs.open(args.wavscp, 'r', encoding='utf-8') as f,\ - codecs.open("result.txt", 'w', encoding='utf-8') as w: - for line in f: - utt_name, utt_path = line.strip().split() - result = loop.run_until_complete(handler.run(utt_path)) - result = result["asr_results"] - w.write(f"{utt_name} {result}\n") - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument( - "--wavfile", - action="store", - help="wav file path ", - default="./16_audio.wav") - parser.add_argument( - "--wavscp", type=str, default=None, help="The batch audios dict text") - args = parser.parse_args() - - main(args)