From 06c9eee3399b4bd492c9c12b8c66ebda50373f03 Mon Sep 17 00:00:00 2001 From: huangyuxin Date: Wed, 8 Jun 2022 05:18:15 +0000 Subject: [PATCH] update reademe, add conf file, updata test_cli --- demos/streaming_asr_server/README.md | 2 + demos/streaming_asr_server/README_cn.md | 4 +- ...former_wenetspeech_application_faster.yaml | 48 +++++++++++++++++++ paddlespeech/cli/asr/infer.py | 2 +- ...former_wenetspeech_application_faster.yaml | 48 +++++++++++++++++++ tests/unit/cli/test_cli.sh | 3 ++ 6 files changed, 105 insertions(+), 2 deletions(-) create mode 100644 demos/streaming_asr_server/conf/ws_conformer_wenetspeech_application_faster.yaml create mode 100644 paddlespeech/server/conf/ws_conformer_wenetspeech_application_faster.yaml diff --git a/demos/streaming_asr_server/README.md b/demos/streaming_asr_server/README.md index 4824da62..a770f58c 100644 --- a/demos/streaming_asr_server/README.md +++ b/demos/streaming_asr_server/README.md @@ -33,6 +33,8 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav ```bash # in PaddleSpeech/demos/streaming_asr_server start the service paddlespeech_server start --config_file ./conf/ws_conformer_wenetspeech_application.yaml + # if you want to increase decoding speed, you can use the config file below, it will increase decoding speed and reduce accuracy + paddlespeech_server start --config_file ./conf/ws_conformer_wenetspeech_application_faster.yaml ``` Usage: diff --git a/demos/streaming_asr_server/README_cn.md b/demos/streaming_asr_server/README_cn.md index 4ed15e17..c771869e 100644 --- a/demos/streaming_asr_server/README_cn.md +++ b/demos/streaming_asr_server/README_cn.md @@ -21,7 +21,7 @@ 下载好 `PaddleSpeech` 之后,进入到 `PaddleSpeech/demos/streaming_asr_server` 目录。 配置文件可参见该目录下 `conf/ws_application.yaml` 和 `conf/ws_conformer_wenetspeech_application.yaml` 。 -目前服务集成的模型有: DeepSpeech2和 conformer模型,对应的配置文件如下: +目前服务集成的模型有: DeepSpeech2 和 conformer模型,对应的配置文件如下: * DeepSpeech: `conf/ws_application.yaml` * conformer: `conf/ws_conformer_wenetspeech_application.yaml` @@ -40,6 +40,8 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav ```bash # 在 PaddleSpeech/demos/streaming_asr_server 目录启动服务 paddlespeech_server start --config_file ./conf/ws_conformer_wenetspeech_application.yaml + # 你如果愿意为了增加解码的速度而牺牲一定的模型精度,你可以使用如下的脚本 + paddlespeech_server start --config_file ./conf/ws_conformer_wenetspeech_application_faster.yaml ``` 使用方法: diff --git a/demos/streaming_asr_server/conf/ws_conformer_wenetspeech_application_faster.yaml b/demos/streaming_asr_server/conf/ws_conformer_wenetspeech_application_faster.yaml new file mode 100644 index 00000000..ba413c80 --- /dev/null +++ b/demos/streaming_asr_server/conf/ws_conformer_wenetspeech_application_faster.yaml @@ -0,0 +1,48 @@ +# This is the parameter configuration file for PaddleSpeech Serving. + +################################################################################# +# SERVER SETTING # +################################################################################# +host: 0.0.0.0 +port: 8090 + +# The task format in the engin_list is: _ +# task choices = ['asr_online'] +# protocol = ['websocket'] (only one can be selected). +# websocket only support online engine type. +protocol: 'websocket' +engine_list: ['asr_online'] + + +################################################################################# +# ENGINE CONFIG # +################################################################################# + +################################### ASR ######################################### +################### speech task: asr; engine_type: online ####################### +asr_online: + model_type: 'conformer_online_wenetspeech' + am_model: # the pdmodel file of am static model [optional] + am_params: # the pdiparams file of am static model [optional] + lang: 'zh' + sample_rate: 16000 + cfg_path: + decode_method: + force_yes: True + device: 'cpu' # cpu or gpu:id + decode_method: "attention_rescoring" + continuous_decoding: True # enable continue decoding when endpoint detected + num_decoding_left_chunks: 16 + am_predictor_conf: + device: # set 'gpu:id' or 'cpu' + switch_ir_optim: True + glog_info: False # True -> print glog + summary: True # False -> do not show predictor config + + chunk_buffer_conf: + window_n: 7 # frame + shift_n: 4 # frame + window_ms: 25 # ms + shift_ms: 10 # ms + sample_rate: 16000 + sample_width: 2 diff --git a/paddlespeech/cli/asr/infer.py b/paddlespeech/cli/asr/infer.py index ad83bc20..76fd34e8 100644 --- a/paddlespeech/cli/asr/infer.py +++ b/paddlespeech/cli/asr/infer.py @@ -88,7 +88,7 @@ class ASRExecutor(BaseExecutor): '-num_left', type=str, default=-1, - help='only support transformer and conformer model') + help='only support transformer and conformer online model') self.parser.add_argument( '--ckpt_path', type=str, diff --git a/paddlespeech/server/conf/ws_conformer_wenetspeech_application_faster.yaml b/paddlespeech/server/conf/ws_conformer_wenetspeech_application_faster.yaml new file mode 100644 index 00000000..ba413c80 --- /dev/null +++ b/paddlespeech/server/conf/ws_conformer_wenetspeech_application_faster.yaml @@ -0,0 +1,48 @@ +# This is the parameter configuration file for PaddleSpeech Serving. + +################################################################################# +# SERVER SETTING # +################################################################################# +host: 0.0.0.0 +port: 8090 + +# The task format in the engin_list is: _ +# task choices = ['asr_online'] +# protocol = ['websocket'] (only one can be selected). +# websocket only support online engine type. +protocol: 'websocket' +engine_list: ['asr_online'] + + +################################################################################# +# ENGINE CONFIG # +################################################################################# + +################################### ASR ######################################### +################### speech task: asr; engine_type: online ####################### +asr_online: + model_type: 'conformer_online_wenetspeech' + am_model: # the pdmodel file of am static model [optional] + am_params: # the pdiparams file of am static model [optional] + lang: 'zh' + sample_rate: 16000 + cfg_path: + decode_method: + force_yes: True + device: 'cpu' # cpu or gpu:id + decode_method: "attention_rescoring" + continuous_decoding: True # enable continue decoding when endpoint detected + num_decoding_left_chunks: 16 + am_predictor_conf: + device: # set 'gpu:id' or 'cpu' + switch_ir_optim: True + glog_info: False # True -> print glog + summary: True # False -> do not show predictor config + + chunk_buffer_conf: + window_n: 7 # frame + shift_n: 4 # frame + window_ms: 25 # ms + shift_ms: 10 # ms + sample_rate: 16000 + sample_width: 2 diff --git a/tests/unit/cli/test_cli.sh b/tests/unit/cli/test_cli.sh index e0ebd141..6879c4d6 100755 --- a/tests/unit/cli/test_cli.sh +++ b/tests/unit/cli/test_cli.sh @@ -22,6 +22,9 @@ paddlespeech asr --model deepspeech2online_wenetspeech --input ./zh.wav paddlespeech asr --model deepspeech2online_aishell --input ./zh.wav paddlespeech asr --model deepspeech2offline_librispeech --lang en --input ./en.wav +# Support editing num_decoding_left_chunks +paddlespeech asr --model conformer_online_wenetspeech --num_decoding_left_chunks 3 --input ./zh.wav + # long audio restriction { wget -c https://paddlespeech.bj.bcebos.com/datasets/single_wav/zh/test_long_audio_01.wav