From 7e88f2bf11698b5a13782c4f771776fe31ca0dd7 Mon Sep 17 00:00:00 2001 From: xiongxinlei Date: Wed, 27 Apr 2022 12:22:33 +0800 Subject: [PATCH 1/3] update streaming asr readme, test=doc --- demos/streaming_asr_server/README.md | 10 +++-- demos/streaming_asr_server/README_cn.md | 14 +++--- .../conf/application.yaml | 45 +++++++++++++++++++ .../conf/ws_application.yaml | 4 +- .../conf/ws_conformer_application.yaml | 4 +- 5 files changed, 63 insertions(+), 14 deletions(-) create mode 100644 demos/streaming_asr_server/conf/application.yaml diff --git a/demos/streaming_asr_server/README.md b/demos/streaming_asr_server/README.md index 6a2f21aa..83b8e05c 100644 --- a/demos/streaming_asr_server/README.md +++ b/demos/streaming_asr_server/README.md @@ -5,6 +5,7 @@ ## Introduction This demo is an implementation of starting the streaming speech service and accessing the service. It can be achieved with a single command using `paddlespeech_server` and `paddlespeech_client` or a few lines of code in python. +Streaming ASR server only support `websocket` protocol, and doesn't support `http` protocol. ## Usage ### 1. Installation @@ -114,7 +115,7 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav server_executor = ServerExecutor() server_executor( - config_file="./conf/ws_conformer_application.yaml", + config_file="./conf/ws_conformer_application.yaml", log_file="./log/paddlespeech.log") ``` @@ -188,7 +189,7 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav **Note:** The response time will be slightly longer when using the client for the first time - Command Line (Recommended) ``` - paddlespeech_client asr_online --server_ip 127.0.0.1 --port 8090 --input ./zh.wav + paddlespeech_client asr --server_ip 127.0.0.1 --port 8090 --input ./zh.wav --protocol websocket ``` Usage: @@ -284,8 +285,9 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav port=8090, sample_rate=16000, lang="zh_cn", - audio_format="wav") - print(res.json()) + audio_format="wav", + protocol="websocket") + print(res) ``` Output: diff --git a/demos/streaming_asr_server/README_cn.md b/demos/streaming_asr_server/README_cn.md index 9224206b..9e5473fe 100644 --- a/demos/streaming_asr_server/README_cn.md +++ b/demos/streaming_asr_server/README_cn.md @@ -5,13 +5,14 @@ ## 介绍 这个demo是一个启动流式语音服务和访问服务的实现。 它可以通过使用`paddlespeech_server` 和 `paddlespeech_client`的单个命令或 python 的几行代码来实现。 +流式语音识别服务只支持 `weboscket` 协议,不支持 `http` 协议。 ## 使用方法 ### 1. 安装 请看 [安装文档](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md). 推荐使用 **paddlepaddle 2.2.1** 或以上版本。 -你可以从 medium,hard 三中方式中选择一种方式安装 PaddleSpeech。 +你可以从medium,hard 二中方式中选择一种方式安装 PaddleSpeech。 ### 2. 准备配置文件 @@ -187,7 +188,7 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav **注意:** 初次使用客户端时响应时间会略长 - 命令行 (推荐使用) ``` - paddlespeech_client asr_online --server_ip 127.0.0.1 --port 8090 --input ./zh.wav + paddlespeech_client asr --server_ip 127.0.0.1 --port 8090 --input ./zh.wav --protocol websocket ``` @@ -275,18 +276,19 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav - Python API ```python - from paddlespeech.server.bin.paddlespeech_client import ASROnlineClientExecutor + from paddlespeech.server.bin.paddlespeech_client import ASRClientExecutor import json - asrclient_executor = ASROnlineClientExecutor() + asrclient_executor = ASRClientExecutor() res = asrclient_executor( input="./zh.wav", server_ip="127.0.0.1", port=8090, sample_rate=16000, lang="zh_cn", - audio_format="wav") - print(res.json()) + audio_format="wav", + protocol="websocket") + print(res) ``` 输出: diff --git a/demos/streaming_asr_server/conf/application.yaml b/demos/streaming_asr_server/conf/application.yaml new file mode 100644 index 00000000..50c7a727 --- /dev/null +++ b/demos/streaming_asr_server/conf/application.yaml @@ -0,0 +1,45 @@ +# This is the parameter configuration file for PaddleSpeech Serving. + +################################################################################# +# SERVER SETTING # +################################################################################# +host: 0.0.0.0 +port: 8090 + +# The task format in the engin_list is: _ +# task choices = ['asr_online'] +# protocol = ['websocket'] (only one can be selected). +# websocket only support online engine type. +protocol: 'websocket' +engine_list: ['asr_online'] + + +################################################################################# +# ENGINE CONFIG # +################################################################################# + +################################### ASR ######################################### +################### speech task: asr; engine_type: online ####################### +asr_online: + model_type: 'conformer_online_multicn' + am_model: # the pdmodel file of am static model [optional] + am_params: # the pdiparams file of am static model [optional] + lang: 'zh' + sample_rate: 16000 + cfg_path: + decode_method: + force_yes: True + device: # cpu or gpu:id + am_predictor_conf: + device: # set 'gpu:id' or 'cpu' + switch_ir_optim: True + glog_info: False # True -> print glog + summary: True # False -> do not show predictor config + + chunk_buffer_conf: + window_n: 7 # frame + shift_n: 4 # frame + window_ms: 25 # ms + shift_ms: 10 # ms + sample_rate: 16000 + sample_width: 2 \ No newline at end of file diff --git a/demos/streaming_asr_server/conf/ws_application.yaml b/demos/streaming_asr_server/conf/ws_application.yaml index dee8d78b..fc02f2ca 100644 --- a/demos/streaming_asr_server/conf/ws_application.yaml +++ b/demos/streaming_asr_server/conf/ws_application.yaml @@ -7,8 +7,8 @@ host: 0.0.0.0 port: 8090 # The task format in the engin_list is: _ -# task choices = ['asr_online', 'tts_online'] -# protocol = ['websocket', 'http'] (only one can be selected). +# task choices = ['asr_online'] +# protocol = ['websocket'] (only one can be selected). # websocket only support online engine type. protocol: 'websocket' engine_list: ['asr_online'] diff --git a/demos/streaming_asr_server/conf/ws_conformer_application.yaml b/demos/streaming_asr_server/conf/ws_conformer_application.yaml index 8f011485..50c7a727 100644 --- a/demos/streaming_asr_server/conf/ws_conformer_application.yaml +++ b/demos/streaming_asr_server/conf/ws_conformer_application.yaml @@ -7,8 +7,8 @@ host: 0.0.0.0 port: 8090 # The task format in the engin_list is: _ -# task choices = ['asr_online', 'tts_online'] -# protocol = ['websocket', 'http'] (only one can be selected). +# task choices = ['asr_online'] +# protocol = ['websocket'] (only one can be selected). # websocket only support online engine type. protocol: 'websocket' engine_list: ['asr_online'] From cb9beabacedb2ae1f2cad6fbc7d0005f93eabe6e Mon Sep 17 00:00:00 2001 From: xiongxinlei Date: Wed, 27 Apr 2022 13:13:05 +0800 Subject: [PATCH 2/3] fix the sv ecapa-tdnn cpu training, test=doc --- examples/voxceleb/sv0/local/train.sh | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/examples/voxceleb/sv0/local/train.sh b/examples/voxceleb/sv0/local/train.sh index 5477d0a3..674fedb3 100755 --- a/examples/voxceleb/sv0/local/train.sh +++ b/examples/voxceleb/sv0/local/train.sh @@ -42,15 +42,25 @@ device="cpu" if ${use_gpu}; then device="gpu" fi +if [ $ngpu -le 0 ]; then + echo "no gpu, training in cpu mode" + device='cpu' + use_gpu=false +fi if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then # train the speaker identification task with voxceleb data # and we will create the trained model parameters in ${exp_dir}/model.pdparams as the soft link # Note: we will store the log file in exp/log directory - python3 -m paddle.distributed.launch --gpus=$CUDA_VISIBLE_DEVICES \ - ${BIN_DIR}/train.py --device ${device} --checkpoint-dir ${exp_dir} \ - --data-dir ${dir} --config ${conf_path} - + if $use_gpu; then + python3 -m paddle.distributed.launch --gpus=$CUDA_VISIBLE_DEVICES \ + ${BIN_DIR}/train.py --device ${device} --checkpoint-dir ${exp_dir} \ + --data-dir ${dir} --config ${conf_path} + else + python3 \ + ${BIN_DIR}/train.py --device ${device} --checkpoint-dir ${exp_dir} \ + --data-dir ${dir} --config ${conf_path} + fi fi if [ $? -ne 0 ]; then From 4c56e4d42cd7cfd991f94aedc712a2ae34bf8250 Mon Sep 17 00:00:00 2001 From: xiongxinlei Date: Wed, 27 Apr 2022 15:59:29 +0800 Subject: [PATCH 3/3] update the voxceleb readme.md, test=doc --- examples/voxceleb/sv0/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/voxceleb/sv0/README.md b/examples/voxceleb/sv0/README.md index 567963e5..1069cfe7 100644 --- a/examples/voxceleb/sv0/README.md +++ b/examples/voxceleb/sv0/README.md @@ -142,7 +142,7 @@ using the `tar` scripts to unpack the model and then you can use the script to t For example: ``` wget https://paddlespeech.bj.bcebos.com/vector/voxceleb/sv0_ecapa_tdnn_voxceleb12_ckpt_0_2_0.tar.gz -tar xzvf sv0_ecapa_tdnn_voxceleb12_ckpt_0_2_0.tar.gz +tar -xvf sv0_ecapa_tdnn_voxceleb12_ckpt_0_2_0.tar.gz source path.sh # If you have processed the data and get the manifest file, you can skip the following 2 steps