From 3b304544f6187b91368c66e5a5b16840f69d175c Mon Sep 17 00:00:00 2001 From: lym0302 Date: Mon, 7 Mar 2022 18:19:17 +0800 Subject: [PATCH] modify yaml, test=doc --- demos/speech_server/README.md | 17 +-- demos/speech_server/README_cn.md | 17 +-- demos/speech_server/conf/application.yaml | 120 +++++++++++++++--- demos/speech_server/conf/asr/asr.yaml | 8 -- demos/speech_server/conf/asr/asr_pd.yaml | 26 ---- demos/speech_server/conf/tts/tts.yaml | 32 ----- demos/speech_server/conf/tts/tts_pd.yaml | 42 ------ demos/speech_server/server.sh | 2 +- paddlespeech/server/bin/main.py | 2 +- .../server/bin/paddlespeech_server.py | 2 +- paddlespeech/server/conf/application.yaml | 120 +++++++++++++++--- paddlespeech/server/conf/asr/asr.yaml | 8 -- paddlespeech/server/conf/asr/asr_pd.yaml | 26 ---- paddlespeech/server/conf/tts/tts.yaml | 32 ----- paddlespeech/server/conf/tts/tts_pd.yaml | 42 ------ .../engine/asr/paddleinference/asr_engine.py | 5 +- .../server/engine/asr/python/asr_engine.py | 6 +- paddlespeech/server/engine/engine_pool.py | 10 +- .../engine/tts/paddleinference/tts_engine.py | 5 +- .../server/engine/tts/python/tts_engine.py | 5 +- tests/unit/server/change_yaml.py | 109 ++++++++-------- tests/unit/server/conf/application.yaml | 120 +++++++++++++++--- tests/unit/server/conf/asr/asr.yaml | 8 -- tests/unit/server/conf/asr/asr_pd.yaml | 26 ---- tests/unit/server/conf/tts/tts.yaml | 32 ----- tests/unit/server/conf/tts/tts_pd.yaml | 42 ------ tests/unit/server/test_server_client.sh | 13 +- 27 files changed, 385 insertions(+), 492 deletions(-) delete mode 100644 demos/speech_server/conf/asr/asr.yaml delete mode 100644 demos/speech_server/conf/asr/asr_pd.yaml delete mode 100644 demos/speech_server/conf/tts/tts.yaml delete mode 100644 demos/speech_server/conf/tts/tts_pd.yaml delete mode 100644 paddlespeech/server/conf/asr/asr.yaml delete mode 100644 paddlespeech/server/conf/asr/asr_pd.yaml delete mode 100644 paddlespeech/server/conf/tts/tts.yaml delete mode 100644 paddlespeech/server/conf/tts/tts_pd.yaml delete mode 100644 tests/unit/server/conf/asr/asr.yaml delete mode 100644 tests/unit/server/conf/asr/asr_pd.yaml delete mode 100644 tests/unit/server/conf/tts/tts.yaml delete mode 100644 tests/unit/server/conf/tts/tts_pd.yaml diff --git a/demos/speech_server/README.md b/demos/speech_server/README.md index 515abaf6..a2f6f221 100644 --- a/demos/speech_server/README.md +++ b/demos/speech_server/README.md @@ -11,21 +11,14 @@ This demo is an implementation of starting the voice service and accessing the s see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md). It is recommended to use **paddlepaddle 2.2.1** or above. -You can choose one way from easy, meduim and hard to install paddlespeech. +You can choose one way from meduim and hard to install paddlespeech. ### 2. Prepare config File -The configuration file contains the service-related configuration files and the model configuration related to the voice tasks contained in the service. They are all under the `conf` folder. +The configuration file can be found in `conf/application.yaml` . +Among them, `engine_list` indicates the speech engine that will be included in the service to be started, in the format of _. +At present, the speech tasks integrated by the service include: asr (speech recognition) and tts (speech synthesis). +Currently the engine type supports two forms: python and inference (Paddle Inference) -**Note: The configuration of `engine_backend` in `application.yaml` represents all speech tasks included in the started service.** -If the service you want to start contains only a certain speech task, then you need to comment out the speech tasks that do not need to be included. For example, if you only want to use the speech recognition (ASR) service, then you can comment out the speech synthesis (TTS) service, as in the following example: -```bash -engine_backend: - asr: 'conf/asr/asr.yaml' - #tts: 'conf/tts/tts.yaml' -``` - -**Note: The configuration file of `engine_backend` in `application.yaml` needs to match the configuration type of `engine_type`.** -When the configuration file of `engine_backend` is `XXX.yaml`, the configuration type of `engine_type` needs to be set to `python`; when the configuration file of `engine_backend` is `XXX_pd.yaml`, the configuration of `engine_type` needs to be set type is `inference`; The input of ASR client demo should be a WAV file(`.wav`), and the sample rate must be the same as the model. diff --git a/demos/speech_server/README_cn.md b/demos/speech_server/README_cn.md index da05b686..762248a1 100644 --- a/demos/speech_server/README_cn.md +++ b/demos/speech_server/README_cn.md @@ -11,20 +11,15 @@ 请看 [安装文档](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md). 推荐使用 **paddlepaddle 2.2.1** 或以上版本。 -你可以从 easy,medium,hard 三中方式中选择一种方式安装 PaddleSpeech。 +你可以从 medium,hard 三中方式中选择一种方式安装 PaddleSpeech。 ### 2. 准备配置文件 -配置文件包含服务相关的配置文件和服务中包含的语音任务相关的模型配置。 它们都在 `conf` 文件夹下。 -**注意:`application.yaml` 中 `engine_backend` 的配置表示启动的服务中包含的所有语音任务。** -如果你想启动的服务中只包含某项语音任务,那么你需要注释掉不需要包含的语音任务。例如你只想使用语音识别(ASR)服务,那么你可以将语音合成(TTS)服务注释掉,如下示例: -```bash -engine_backend: - asr: 'conf/asr/asr.yaml' - #tts: 'conf/tts/tts.yaml' -``` -**注意:`application.yaml` 中 `engine_backend` 的配置文件需要和 `engine_type` 的配置类型匹配。** -当`engine_backend` 的配置文件为`XXX.yaml`时,需要设置`engine_type`的配置类型为`python`;当`engine_backend` 的配置文件为`XXX_pd.yaml`时,需要设置`engine_type`的配置类型为`inference`; +配置文件可参见 `conf/application.yaml` 。 +其中,`engine_list`表示即将启动的服务将会包含的语音引擎,格式为 <语音任务>_<引擎类型>。 +目前服务集成的语音任务有: asr(语音识别)、tts(语音合成)。 +目前引擎类型支持两种形式:python 及 inference (Paddle Inference) + 这个 ASR client 的输入应该是一个 WAV 文件(`.wav`),并且采样率必须与模型的采样率相同。 diff --git a/demos/speech_server/conf/application.yaml b/demos/speech_server/conf/application.yaml index aba33a51..6048450b 100644 --- a/demos/speech_server/conf/application.yaml +++ b/demos/speech_server/conf/application.yaml @@ -1,27 +1,107 @@ # This is the parameter configuration file for PaddleSpeech Serving. -################################################################## -# SERVER SETTING # -################################################################## +################################################################################# +# SERVER SETTING # +################################################################################# host: 127.0.0.1 port: 8090 -################################################################## -# CONFIG FILE # -################################################################## -# add engine backend type (Options: asr, tts) and config file here. -# Adding a speech task to engine_backend means starting the service. -engine_backend: - asr: 'conf/asr/asr.yaml' - tts: 'conf/tts/tts.yaml' - -# The engine_type of speech task needs to keep the same type as the config file of speech task. -# E.g: The engine_type of asr is 'python', the engine_backend of asr is 'XX/asr.yaml' -# E.g: The engine_type of asr is 'inference', the engine_backend of asr is 'XX/asr_pd.yaml' -# -# add engine type (Options: python, inference) -engine_type: - asr: 'python' - tts: 'python' +# The task format in the engin_list is: _ +# task choices = ['asr_python', 'asr_inference', 'tts_python', 'tts_inference'] +engine_list: ['asr_python', 'tts_python'] + + +################################################################################# +# ENGINE CONFIG # +################################################################################# +################### speech task: asr; engine_type: python ####################### +asr_python: + model: 'conformer_wenetspeech' + lang: 'zh' + sample_rate: 16000 + cfg_path: # [optional] + ckpt_path: # [optional] + decode_method: 'attention_rescoring' + force_yes: True + device: # set 'gpu:id' or 'cpu' + + +################### speech task: asr; engine_type: inference ####################### +asr_inference: + # model_type choices=['deepspeech2offline_aishell'] + model_type: 'deepspeech2offline_aishell' + am_model: # the pdmodel file of am static model [optional] + am_params: # the pdiparams file of am static model [optional] + lang: 'zh' + sample_rate: 16000 + cfg_path: + decode_method: + force_yes: True + + am_predictor_conf: + device: # set 'gpu:id' or 'cpu' + switch_ir_optim: True + glog_info: False # True -> print glog + summary: True # False -> do not show predictor config + + +################### speech task: tts; engine_type: python ####################### +tts_python: + # am (acoustic model) choices=['speedyspeech_csmsc', 'fastspeech2_csmsc', + # 'fastspeech2_ljspeech', 'fastspeech2_aishell3', + # 'fastspeech2_vctk'] + am: 'fastspeech2_csmsc' + am_config: + am_ckpt: + am_stat: + phones_dict: + tones_dict: + speaker_dict: + spk_id: 0 + + # voc (vocoder) choices=['pwgan_csmsc', 'pwgan_ljspeech', 'pwgan_aishell3', + # 'pwgan_vctk', 'mb_melgan_csmsc'] + voc: 'pwgan_csmsc' + voc_config: + voc_ckpt: + voc_stat: + + # others + lang: 'zh' + device: # set 'gpu:id' or 'cpu' + + +################### speech task: tts; engine_type: inference ####################### +tts_inference: + # am (acoustic model) choices=['speedyspeech_csmsc', 'fastspeech2_csmsc'] + am: 'fastspeech2_csmsc' + am_model: # the pdmodel file of your am static model (XX.pdmodel) + am_params: # the pdiparams file of your am static model (XX.pdipparams) + am_sample_rate: 24000 + phones_dict: + tones_dict: + speaker_dict: + spk_id: 0 + + am_predictor_conf: + device: # set 'gpu:id' or 'cpu' + switch_ir_optim: True + glog_info: False # True -> print glog + summary: True # False -> do not show predictor config + + # voc (vocoder) choices=['pwgan_csmsc', 'mb_melgan_csmsc','hifigan_csmsc'] + voc: 'pwgan_csmsc' + voc_model: # the pdmodel file of your vocoder static model (XX.pdmodel) + voc_params: # the pdiparams file of your vocoder static model (XX.pdipparams) + voc_sample_rate: 24000 + + voc_predictor_conf: + device: # set 'gpu:id' or 'cpu' + switch_ir_optim: True + glog_info: False # True -> print glog + summary: True # False -> do not show predictor config + + # others + lang: 'zh' diff --git a/demos/speech_server/conf/asr/asr.yaml b/demos/speech_server/conf/asr/asr.yaml deleted file mode 100644 index a6743b77..00000000 --- a/demos/speech_server/conf/asr/asr.yaml +++ /dev/null @@ -1,8 +0,0 @@ -model: 'conformer_wenetspeech' -lang: 'zh' -sample_rate: 16000 -cfg_path: # [optional] -ckpt_path: # [optional] -decode_method: 'attention_rescoring' -force_yes: True -device: # set 'gpu:id' or 'cpu' diff --git a/demos/speech_server/conf/asr/asr_pd.yaml b/demos/speech_server/conf/asr/asr_pd.yaml deleted file mode 100644 index 4c415ac7..00000000 --- a/demos/speech_server/conf/asr/asr_pd.yaml +++ /dev/null @@ -1,26 +0,0 @@ -# This is the parameter configuration file for ASR server. -# These are the static models that support paddle inference. - -################################################################## -# ACOUSTIC MODEL SETTING # -# am choices=['deepspeech2offline_aishell'] TODO -################################################################## -model_type: 'deepspeech2offline_aishell' -am_model: # the pdmodel file of am static model [optional] -am_params: # the pdiparams file of am static model [optional] -lang: 'zh' -sample_rate: 16000 -cfg_path: -decode_method: -force_yes: True - -am_predictor_conf: - device: # set 'gpu:id' or 'cpu' - switch_ir_optim: True - glog_info: False # True -> print glog - summary: True # False -> do not show predictor config - - -################################################################## -# OTHERS # -################################################################## diff --git a/demos/speech_server/conf/tts/tts.yaml b/demos/speech_server/conf/tts/tts.yaml deleted file mode 100644 index 19207f0b..00000000 --- a/demos/speech_server/conf/tts/tts.yaml +++ /dev/null @@ -1,32 +0,0 @@ -# This is the parameter configuration file for TTS server. - -################################################################## -# ACOUSTIC MODEL SETTING # -# am choices=['speedyspeech_csmsc', 'fastspeech2_csmsc', -# 'fastspeech2_ljspeech', 'fastspeech2_aishell3', -# 'fastspeech2_vctk'] -################################################################## -am: 'fastspeech2_csmsc' -am_config: -am_ckpt: -am_stat: -phones_dict: -tones_dict: -speaker_dict: -spk_id: 0 - -################################################################## -# VOCODER SETTING # -# voc choices=['pwgan_csmsc', 'pwgan_ljspeech', 'pwgan_aishell3', -# 'pwgan_vctk', 'mb_melgan_csmsc'] -################################################################## -voc: 'pwgan_csmsc' -voc_config: -voc_ckpt: -voc_stat: - -################################################################## -# OTHERS # -################################################################## -lang: 'zh' -device: # set 'gpu:id' or 'cpu' diff --git a/demos/speech_server/conf/tts/tts_pd.yaml b/demos/speech_server/conf/tts/tts_pd.yaml deleted file mode 100644 index e27b9665..00000000 --- a/demos/speech_server/conf/tts/tts_pd.yaml +++ /dev/null @@ -1,42 +0,0 @@ -# This is the parameter configuration file for TTS server. -# These are the static models that support paddle inference. - -################################################################## -# ACOUSTIC MODEL SETTING # -# am choices=['speedyspeech_csmsc', 'fastspeech2_csmsc'] -################################################################## -am: 'fastspeech2_csmsc' -am_model: # the pdmodel file of your am static model (XX.pdmodel) -am_params: # the pdiparams file of your am static model (XX.pdipparams) -am_sample_rate: 24000 -phones_dict: -tones_dict: -speaker_dict: -spk_id: 0 - -am_predictor_conf: - device: # set 'gpu:id' or 'cpu' - switch_ir_optim: True - glog_info: False # True -> print glog - summary: True # False -> do not show predictor config - - -################################################################## -# VOCODER SETTING # -# voc choices=['pwgan_csmsc', 'mb_melgan_csmsc','hifigan_csmsc'] -################################################################## -voc: 'pwgan_csmsc' -voc_model: # the pdmodel file of your vocoder static model (XX.pdmodel) -voc_params: # the pdiparams file of your vocoder static model (XX.pdipparams) -voc_sample_rate: 24000 - -voc_predictor_conf: - device: # set 'gpu:id' or 'cpu' - switch_ir_optim: True - glog_info: False # True -> print glog - summary: True # False -> do not show predictor config - -################################################################## -# OTHERS # -################################################################## -lang: 'zh' diff --git a/demos/speech_server/server.sh b/demos/speech_server/server.sh index d9367ec0..e5961286 100644 --- a/demos/speech_server/server.sh +++ b/demos/speech_server/server.sh @@ -1,3 +1,3 @@ #!/bin/bash -paddlespeech_server start --config_file ./conf/application.yaml \ No newline at end of file +paddlespeech_server start --config_file ./conf/application.yaml diff --git a/paddlespeech/server/bin/main.py b/paddlespeech/server/bin/main.py index 360d295e..de528299 100644 --- a/paddlespeech/server/bin/main.py +++ b/paddlespeech/server/bin/main.py @@ -34,7 +34,7 @@ def init(config): bool: """ # init api - api_list = list(config.engine_backend) + api_list = list(engine.split("_")[0] for engine in config.engine_list) api_router = setup_router(api_list) app.include_router(api_router) diff --git a/paddlespeech/server/bin/paddlespeech_server.py b/paddlespeech/server/bin/paddlespeech_server.py index 21fc5c65..3d71f091 100644 --- a/paddlespeech/server/bin/paddlespeech_server.py +++ b/paddlespeech/server/bin/paddlespeech_server.py @@ -62,7 +62,7 @@ class ServerExecutor(BaseExecutor): bool: """ # init api - api_list = list(config.engine_backend) + api_list = list(engine.split("_")[0] for engine in config.engine_list) api_router = setup_router(api_list) app.include_router(api_router) diff --git a/paddlespeech/server/conf/application.yaml b/paddlespeech/server/conf/application.yaml index aba33a51..6048450b 100644 --- a/paddlespeech/server/conf/application.yaml +++ b/paddlespeech/server/conf/application.yaml @@ -1,27 +1,107 @@ # This is the parameter configuration file for PaddleSpeech Serving. -################################################################## -# SERVER SETTING # -################################################################## +################################################################################# +# SERVER SETTING # +################################################################################# host: 127.0.0.1 port: 8090 -################################################################## -# CONFIG FILE # -################################################################## -# add engine backend type (Options: asr, tts) and config file here. -# Adding a speech task to engine_backend means starting the service. -engine_backend: - asr: 'conf/asr/asr.yaml' - tts: 'conf/tts/tts.yaml' - -# The engine_type of speech task needs to keep the same type as the config file of speech task. -# E.g: The engine_type of asr is 'python', the engine_backend of asr is 'XX/asr.yaml' -# E.g: The engine_type of asr is 'inference', the engine_backend of asr is 'XX/asr_pd.yaml' -# -# add engine type (Options: python, inference) -engine_type: - asr: 'python' - tts: 'python' +# The task format in the engin_list is: _ +# task choices = ['asr_python', 'asr_inference', 'tts_python', 'tts_inference'] +engine_list: ['asr_python', 'tts_python'] + + +################################################################################# +# ENGINE CONFIG # +################################################################################# +################### speech task: asr; engine_type: python ####################### +asr_python: + model: 'conformer_wenetspeech' + lang: 'zh' + sample_rate: 16000 + cfg_path: # [optional] + ckpt_path: # [optional] + decode_method: 'attention_rescoring' + force_yes: True + device: # set 'gpu:id' or 'cpu' + + +################### speech task: asr; engine_type: inference ####################### +asr_inference: + # model_type choices=['deepspeech2offline_aishell'] + model_type: 'deepspeech2offline_aishell' + am_model: # the pdmodel file of am static model [optional] + am_params: # the pdiparams file of am static model [optional] + lang: 'zh' + sample_rate: 16000 + cfg_path: + decode_method: + force_yes: True + + am_predictor_conf: + device: # set 'gpu:id' or 'cpu' + switch_ir_optim: True + glog_info: False # True -> print glog + summary: True # False -> do not show predictor config + + +################### speech task: tts; engine_type: python ####################### +tts_python: + # am (acoustic model) choices=['speedyspeech_csmsc', 'fastspeech2_csmsc', + # 'fastspeech2_ljspeech', 'fastspeech2_aishell3', + # 'fastspeech2_vctk'] + am: 'fastspeech2_csmsc' + am_config: + am_ckpt: + am_stat: + phones_dict: + tones_dict: + speaker_dict: + spk_id: 0 + + # voc (vocoder) choices=['pwgan_csmsc', 'pwgan_ljspeech', 'pwgan_aishell3', + # 'pwgan_vctk', 'mb_melgan_csmsc'] + voc: 'pwgan_csmsc' + voc_config: + voc_ckpt: + voc_stat: + + # others + lang: 'zh' + device: # set 'gpu:id' or 'cpu' + + +################### speech task: tts; engine_type: inference ####################### +tts_inference: + # am (acoustic model) choices=['speedyspeech_csmsc', 'fastspeech2_csmsc'] + am: 'fastspeech2_csmsc' + am_model: # the pdmodel file of your am static model (XX.pdmodel) + am_params: # the pdiparams file of your am static model (XX.pdipparams) + am_sample_rate: 24000 + phones_dict: + tones_dict: + speaker_dict: + spk_id: 0 + + am_predictor_conf: + device: # set 'gpu:id' or 'cpu' + switch_ir_optim: True + glog_info: False # True -> print glog + summary: True # False -> do not show predictor config + + # voc (vocoder) choices=['pwgan_csmsc', 'mb_melgan_csmsc','hifigan_csmsc'] + voc: 'pwgan_csmsc' + voc_model: # the pdmodel file of your vocoder static model (XX.pdmodel) + voc_params: # the pdiparams file of your vocoder static model (XX.pdipparams) + voc_sample_rate: 24000 + + voc_predictor_conf: + device: # set 'gpu:id' or 'cpu' + switch_ir_optim: True + glog_info: False # True -> print glog + summary: True # False -> do not show predictor config + + # others + lang: 'zh' diff --git a/paddlespeech/server/conf/asr/asr.yaml b/paddlespeech/server/conf/asr/asr.yaml deleted file mode 100644 index a6743b77..00000000 --- a/paddlespeech/server/conf/asr/asr.yaml +++ /dev/null @@ -1,8 +0,0 @@ -model: 'conformer_wenetspeech' -lang: 'zh' -sample_rate: 16000 -cfg_path: # [optional] -ckpt_path: # [optional] -decode_method: 'attention_rescoring' -force_yes: True -device: # set 'gpu:id' or 'cpu' diff --git a/paddlespeech/server/conf/asr/asr_pd.yaml b/paddlespeech/server/conf/asr/asr_pd.yaml deleted file mode 100644 index 4c415ac7..00000000 --- a/paddlespeech/server/conf/asr/asr_pd.yaml +++ /dev/null @@ -1,26 +0,0 @@ -# This is the parameter configuration file for ASR server. -# These are the static models that support paddle inference. - -################################################################## -# ACOUSTIC MODEL SETTING # -# am choices=['deepspeech2offline_aishell'] TODO -################################################################## -model_type: 'deepspeech2offline_aishell' -am_model: # the pdmodel file of am static model [optional] -am_params: # the pdiparams file of am static model [optional] -lang: 'zh' -sample_rate: 16000 -cfg_path: -decode_method: -force_yes: True - -am_predictor_conf: - device: # set 'gpu:id' or 'cpu' - switch_ir_optim: True - glog_info: False # True -> print glog - summary: True # False -> do not show predictor config - - -################################################################## -# OTHERS # -################################################################## diff --git a/paddlespeech/server/conf/tts/tts.yaml b/paddlespeech/server/conf/tts/tts.yaml deleted file mode 100644 index 19207f0b..00000000 --- a/paddlespeech/server/conf/tts/tts.yaml +++ /dev/null @@ -1,32 +0,0 @@ -# This is the parameter configuration file for TTS server. - -################################################################## -# ACOUSTIC MODEL SETTING # -# am choices=['speedyspeech_csmsc', 'fastspeech2_csmsc', -# 'fastspeech2_ljspeech', 'fastspeech2_aishell3', -# 'fastspeech2_vctk'] -################################################################## -am: 'fastspeech2_csmsc' -am_config: -am_ckpt: -am_stat: -phones_dict: -tones_dict: -speaker_dict: -spk_id: 0 - -################################################################## -# VOCODER SETTING # -# voc choices=['pwgan_csmsc', 'pwgan_ljspeech', 'pwgan_aishell3', -# 'pwgan_vctk', 'mb_melgan_csmsc'] -################################################################## -voc: 'pwgan_csmsc' -voc_config: -voc_ckpt: -voc_stat: - -################################################################## -# OTHERS # -################################################################## -lang: 'zh' -device: # set 'gpu:id' or 'cpu' diff --git a/paddlespeech/server/conf/tts/tts_pd.yaml b/paddlespeech/server/conf/tts/tts_pd.yaml deleted file mode 100644 index e27b9665..00000000 --- a/paddlespeech/server/conf/tts/tts_pd.yaml +++ /dev/null @@ -1,42 +0,0 @@ -# This is the parameter configuration file for TTS server. -# These are the static models that support paddle inference. - -################################################################## -# ACOUSTIC MODEL SETTING # -# am choices=['speedyspeech_csmsc', 'fastspeech2_csmsc'] -################################################################## -am: 'fastspeech2_csmsc' -am_model: # the pdmodel file of your am static model (XX.pdmodel) -am_params: # the pdiparams file of your am static model (XX.pdipparams) -am_sample_rate: 24000 -phones_dict: -tones_dict: -speaker_dict: -spk_id: 0 - -am_predictor_conf: - device: # set 'gpu:id' or 'cpu' - switch_ir_optim: True - glog_info: False # True -> print glog - summary: True # False -> do not show predictor config - - -################################################################## -# VOCODER SETTING # -# voc choices=['pwgan_csmsc', 'mb_melgan_csmsc','hifigan_csmsc'] -################################################################## -voc: 'pwgan_csmsc' -voc_model: # the pdmodel file of your vocoder static model (XX.pdmodel) -voc_params: # the pdiparams file of your vocoder static model (XX.pdipparams) -voc_sample_rate: 24000 - -voc_predictor_conf: - device: # set 'gpu:id' or 'cpu' - switch_ir_optim: True - glog_info: False # True -> print glog - summary: True # False -> do not show predictor config - -################################################################## -# OTHERS # -################################################################## -lang: 'zh' diff --git a/paddlespeech/server/engine/asr/paddleinference/asr_engine.py b/paddlespeech/server/engine/asr/paddleinference/asr_engine.py index cb973e92..1925bf1d 100644 --- a/paddlespeech/server/engine/asr/paddleinference/asr_engine.py +++ b/paddlespeech/server/engine/asr/paddleinference/asr_engine.py @@ -26,7 +26,6 @@ from paddlespeech.s2t.frontend.featurizer.text_featurizer import TextFeaturizer from paddlespeech.s2t.modules.ctc import CTCDecoder from paddlespeech.s2t.utils.utility import UpdateConfig from paddlespeech.server.engine.base_engine import BaseEngine -from paddlespeech.server.utils.config import get_config from paddlespeech.server.utils.paddle_predictor import init_predictor from paddlespeech.server.utils.paddle_predictor import run_model @@ -184,7 +183,7 @@ class ASREngine(BaseEngine): def __init__(self): super(ASREngine, self).__init__() - def init(self, config_file: str) -> bool: + def init(self, config: dict) -> bool: """init engine resource Args: @@ -196,7 +195,7 @@ class ASREngine(BaseEngine): self.input = None self.output = None self.executor = ASRServerExecutor() - self.config = get_config(config_file) + self.config = config self.executor._init_from_path( model_type=self.config.model_type, diff --git a/paddlespeech/server/engine/asr/python/asr_engine.py b/paddlespeech/server/engine/asr/python/asr_engine.py index 1e2c5cc2..e76c49a7 100644 --- a/paddlespeech/server/engine/asr/python/asr_engine.py +++ b/paddlespeech/server/engine/asr/python/asr_engine.py @@ -19,7 +19,6 @@ import paddle from paddlespeech.cli.asr.infer import ASRExecutor from paddlespeech.cli.log import logger from paddlespeech.server.engine.base_engine import BaseEngine -from paddlespeech.server.utils.config import get_config __all__ = ['ASREngine'] @@ -40,7 +39,7 @@ class ASREngine(BaseEngine): def __init__(self): super(ASREngine, self).__init__() - def init(self, config_file: str) -> bool: + def init(self, config: dict) -> bool: """init engine resource Args: @@ -52,8 +51,7 @@ class ASREngine(BaseEngine): self.input = None self.output = None self.executor = ASRServerExecutor() - - self.config = get_config(config_file) + self.config = config try: if self.config.device: self.device = self.config.device diff --git a/paddlespeech/server/engine/engine_pool.py b/paddlespeech/server/engine/engine_pool.py index f6a4d2aa..9de73567 100644 --- a/paddlespeech/server/engine/engine_pool.py +++ b/paddlespeech/server/engine/engine_pool.py @@ -28,11 +28,13 @@ def init_engine_pool(config) -> bool: """ Init engine pool """ global ENGINE_POOL - for engine in config.engine_backend: + + for engine_and_type in config.engine_list: + engine = engine_and_type.split("_")[0] + engine_type = engine_and_type.split("_")[1] ENGINE_POOL[engine] = EngineFactory.get_engine( - engine_name=engine, engine_type=config.engine_type[engine]) - if not ENGINE_POOL[engine].init( - config_file=config.engine_backend[engine]): + engine_name=engine, engine_type=engine_type) + if not ENGINE_POOL[engine].init(config=config[engine_and_type]): return False return True diff --git a/paddlespeech/server/engine/tts/paddleinference/tts_engine.py b/paddlespeech/server/engine/tts/paddleinference/tts_engine.py index 5955c1a2..1bbbe0ea 100644 --- a/paddlespeech/server/engine/tts/paddleinference/tts_engine.py +++ b/paddlespeech/server/engine/tts/paddleinference/tts_engine.py @@ -29,7 +29,6 @@ from paddlespeech.cli.utils import download_and_decompress from paddlespeech.cli.utils import MODEL_HOME from paddlespeech.server.engine.base_engine import BaseEngine from paddlespeech.server.utils.audio_process import change_speed -from paddlespeech.server.utils.config import get_config from paddlespeech.server.utils.errors import ErrorCode from paddlespeech.server.utils.exception import ServerBaseException from paddlespeech.server.utils.paddle_predictor import init_predictor @@ -357,11 +356,11 @@ class TTSEngine(BaseEngine): """ super(TTSEngine, self).__init__() - def init(self, config_file: str) -> bool: + def init(self, config: dict) -> bool: self.executor = TTSServerExecutor() try: - self.config = get_config(config_file) + self.config = config self.executor._init_from_path( am=self.config.am, am_model=self.config.am_model, diff --git a/paddlespeech/server/engine/tts/python/tts_engine.py b/paddlespeech/server/engine/tts/python/tts_engine.py index 7dd57669..8d6c7fd1 100644 --- a/paddlespeech/server/engine/tts/python/tts_engine.py +++ b/paddlespeech/server/engine/tts/python/tts_engine.py @@ -25,7 +25,6 @@ from paddlespeech.cli.log import logger from paddlespeech.cli.tts.infer import TTSExecutor from paddlespeech.server.engine.base_engine import BaseEngine from paddlespeech.server.utils.audio_process import change_speed -from paddlespeech.server.utils.config import get_config from paddlespeech.server.utils.errors import ErrorCode from paddlespeech.server.utils.exception import ServerBaseException @@ -50,11 +49,11 @@ class TTSEngine(BaseEngine): """ super(TTSEngine, self).__init__() - def init(self, config_file: str) -> bool: + def init(self, config: dict) -> bool: self.executor = TTSServerExecutor() try: - self.config = get_config(config_file) + self.config = config if self.config.device: self.device = self.config.device else: diff --git a/tests/unit/server/change_yaml.py b/tests/unit/server/change_yaml.py index 5a5d9ae0..1f063d8f 100644 --- a/tests/unit/server/change_yaml.py +++ b/tests/unit/server/change_yaml.py @@ -5,7 +5,7 @@ import os import yaml -def change_speech_yaml(yaml_name: str, device: str): +def change_device(yamlfile: str, engine: str, device: str): """Change the settings of the device under the voice task configuration file Args: @@ -13,68 +13,54 @@ def change_speech_yaml(yaml_name: str, device: str): cpu (bool): True means set device to "cpu" model_type (dict): change model type """ - if "asr" in yaml_name: - dirpath = "./conf/asr/" - elif 'tts' in yaml_name: - dirpath = "./conf/tts/" - yamlfile = dirpath + yaml_name + ".yaml" - tmp_yamlfile = dirpath + yaml_name + "_tmp.yaml" + tmp_yamlfile = yamlfile.split(".yaml")[0] + "_tmp.yaml" os.system("cp %s %s" % (yamlfile, tmp_yamlfile)) + if device == 'cpu': + set_device = 'cpu' + elif device == 'gpu': + set_device = 'gpu:0' + else: + print("Please set correct device: cpu or gpu.") + with open(tmp_yamlfile) as f, open(yamlfile, "w+", encoding="utf-8") as fw: y = yaml.safe_load(f) - if device == 'cpu': - print("Set device: cpu") - if yaml_name == 'asr': - y['device'] = 'cpu' - elif yaml_name == 'asr_pd': - y['am_predictor_conf']['device'] = 'cpu' - elif yaml_name == 'tts': - y['device'] = 'cpu' - elif yaml_name == 'tts_pd': - y['am_predictor_conf']['device'] = 'cpu' - y['voc_predictor_conf']['device'] = 'cpu' - elif device == 'gpu': - print("Set device: gpu") - if yaml_name == 'asr': - y['device'] = 'gpu:0' - elif yaml_name == 'asr_pd': - y['am_predictor_conf']['device'] = 'gpu:0' - elif yaml_name == 'tts': - y['device'] = 'gpu:0' - elif yaml_name == 'tts_pd': - y['am_predictor_conf']['device'] = 'gpu:0' - y['voc_predictor_conf']['device'] = 'gpu:0' + if engine == 'asr_python' or engine == 'tts_python': + y[engine]['device'] = set_device + elif engine == 'asr_inference': + y[engine]['am_predictor_conf']['device'] = set_device + elif engine == 'tts_inference': + y[engine]['am_predictor_conf']['device'] = set_device + y[engine]['voc_predictor_conf']['device'] = set_device else: - print("Please set correct device: cpu or gpu.") + print( + "Please set correct engine: asr_python, tts_python, asr_inference, tts_inference." + ) - print("The content of '%s': " % (yamlfile)) print(yaml.dump(y, default_flow_style=False, sort_keys=False)) yaml.dump(y, fw, allow_unicode=True) os.system("rm %s" % (tmp_yamlfile)) print("Change %s successfully." % (yamlfile)) -def change_app_yaml(task: str, engine_type: str): +def change_engine_type(yamlfile: str, engine_type): """Change the engine type and corresponding configuration file of the speech task in application.yaml Args: task (str): asr or tts """ - yamlfile = "./conf/application.yaml" - tmp_yamlfile = "./conf/application_tmp.yaml" + tmp_yamlfile = yamlfile.split(".yaml")[0] + "_tmp.yaml" os.system("cp %s %s" % (yamlfile, tmp_yamlfile)) + speech_task = engine_type.split("_")[0] + with open(tmp_yamlfile) as f, open(yamlfile, "w+", encoding="utf-8") as fw: y = yaml.safe_load(f) - y['engine_type'][task] = engine_type - path_list = ["./conf/", task, "/", task] - if engine_type == 'python': - path_list.append(".yaml") - - elif engine_type == 'inference': - path_list.append("_pd.yaml") - y['engine_backend'][task] = ''.join(path_list) - print("The content of './conf/application.yaml': ") + engine_list = y['engine_list'] + for engine in engine_list: + if speech_task in engine: + engine_list.remove(engine) + engine_list.append(engine_type) + y['engine_list'] = engine_list print(yaml.dump(y, default_flow_style=False, sort_keys=False)) yaml.dump(y, fw, allow_unicode=True) os.system("rm %s" % (tmp_yamlfile)) @@ -83,32 +69,37 @@ def change_app_yaml(task: str, engine_type: str): if __name__ == "__main__": parser = argparse.ArgumentParser() + parser.add_argument( + '--config_file', + type=str, + default='./conf/application.yaml', + help='server yaml file.') parser.add_argument( '--change_task', type=str, default=None, help='Change task', choices=[ - 'app-asr-python', - 'app-asr-inference', - 'app-tts-python', - 'app-tts-inference', - 'speech-asr-cpu', - 'speech-asr-gpu', - 'speech-asr_pd-cpu', - 'speech-asr_pd-gpu', - 'speech-tts-cpu', - 'speech-tts-gpu', - 'speech-tts_pd-cpu', - 'speech-tts_pd-gpu', + 'enginetype-asr_python', + 'enginetype-asr_inference', + 'enginetype-tts_python', + 'enginetype-tts_inference', + 'device-asr_python-cpu', + 'device-asr_python-gpu', + 'device-asr_inference-cpu', + 'device-asr_inference-gpu', + 'device-tts_python-cpu', + 'device-tts_python-gpu', + 'device-tts_inference-cpu', + 'device-tts_inference-gpu', ], required=True) args = parser.parse_args() types = args.change_task.split("-") - if types[0] == "app": - change_app_yaml(types[1], types[2]) - elif types[0] == "speech": - change_speech_yaml(types[1], types[2]) + if types[0] == "enginetype": + change_engine_type(args.config_file, types[1]) + elif types[0] == "device": + change_device(args.config_file, types[1], types[2]) else: print("Error change task, please check change_task.") diff --git a/tests/unit/server/conf/application.yaml b/tests/unit/server/conf/application.yaml index aba33a51..6048450b 100644 --- a/tests/unit/server/conf/application.yaml +++ b/tests/unit/server/conf/application.yaml @@ -1,27 +1,107 @@ # This is the parameter configuration file for PaddleSpeech Serving. -################################################################## -# SERVER SETTING # -################################################################## +################################################################################# +# SERVER SETTING # +################################################################################# host: 127.0.0.1 port: 8090 -################################################################## -# CONFIG FILE # -################################################################## -# add engine backend type (Options: asr, tts) and config file here. -# Adding a speech task to engine_backend means starting the service. -engine_backend: - asr: 'conf/asr/asr.yaml' - tts: 'conf/tts/tts.yaml' - -# The engine_type of speech task needs to keep the same type as the config file of speech task. -# E.g: The engine_type of asr is 'python', the engine_backend of asr is 'XX/asr.yaml' -# E.g: The engine_type of asr is 'inference', the engine_backend of asr is 'XX/asr_pd.yaml' -# -# add engine type (Options: python, inference) -engine_type: - asr: 'python' - tts: 'python' +# The task format in the engin_list is: _ +# task choices = ['asr_python', 'asr_inference', 'tts_python', 'tts_inference'] +engine_list: ['asr_python', 'tts_python'] + + +################################################################################# +# ENGINE CONFIG # +################################################################################# +################### speech task: asr; engine_type: python ####################### +asr_python: + model: 'conformer_wenetspeech' + lang: 'zh' + sample_rate: 16000 + cfg_path: # [optional] + ckpt_path: # [optional] + decode_method: 'attention_rescoring' + force_yes: True + device: # set 'gpu:id' or 'cpu' + + +################### speech task: asr; engine_type: inference ####################### +asr_inference: + # model_type choices=['deepspeech2offline_aishell'] + model_type: 'deepspeech2offline_aishell' + am_model: # the pdmodel file of am static model [optional] + am_params: # the pdiparams file of am static model [optional] + lang: 'zh' + sample_rate: 16000 + cfg_path: + decode_method: + force_yes: True + + am_predictor_conf: + device: # set 'gpu:id' or 'cpu' + switch_ir_optim: True + glog_info: False # True -> print glog + summary: True # False -> do not show predictor config + + +################### speech task: tts; engine_type: python ####################### +tts_python: + # am (acoustic model) choices=['speedyspeech_csmsc', 'fastspeech2_csmsc', + # 'fastspeech2_ljspeech', 'fastspeech2_aishell3', + # 'fastspeech2_vctk'] + am: 'fastspeech2_csmsc' + am_config: + am_ckpt: + am_stat: + phones_dict: + tones_dict: + speaker_dict: + spk_id: 0 + + # voc (vocoder) choices=['pwgan_csmsc', 'pwgan_ljspeech', 'pwgan_aishell3', + # 'pwgan_vctk', 'mb_melgan_csmsc'] + voc: 'pwgan_csmsc' + voc_config: + voc_ckpt: + voc_stat: + + # others + lang: 'zh' + device: # set 'gpu:id' or 'cpu' + + +################### speech task: tts; engine_type: inference ####################### +tts_inference: + # am (acoustic model) choices=['speedyspeech_csmsc', 'fastspeech2_csmsc'] + am: 'fastspeech2_csmsc' + am_model: # the pdmodel file of your am static model (XX.pdmodel) + am_params: # the pdiparams file of your am static model (XX.pdipparams) + am_sample_rate: 24000 + phones_dict: + tones_dict: + speaker_dict: + spk_id: 0 + + am_predictor_conf: + device: # set 'gpu:id' or 'cpu' + switch_ir_optim: True + glog_info: False # True -> print glog + summary: True # False -> do not show predictor config + + # voc (vocoder) choices=['pwgan_csmsc', 'mb_melgan_csmsc','hifigan_csmsc'] + voc: 'pwgan_csmsc' + voc_model: # the pdmodel file of your vocoder static model (XX.pdmodel) + voc_params: # the pdiparams file of your vocoder static model (XX.pdipparams) + voc_sample_rate: 24000 + + voc_predictor_conf: + device: # set 'gpu:id' or 'cpu' + switch_ir_optim: True + glog_info: False # True -> print glog + summary: True # False -> do not show predictor config + + # others + lang: 'zh' diff --git a/tests/unit/server/conf/asr/asr.yaml b/tests/unit/server/conf/asr/asr.yaml deleted file mode 100644 index a6743b77..00000000 --- a/tests/unit/server/conf/asr/asr.yaml +++ /dev/null @@ -1,8 +0,0 @@ -model: 'conformer_wenetspeech' -lang: 'zh' -sample_rate: 16000 -cfg_path: # [optional] -ckpt_path: # [optional] -decode_method: 'attention_rescoring' -force_yes: True -device: # set 'gpu:id' or 'cpu' diff --git a/tests/unit/server/conf/asr/asr_pd.yaml b/tests/unit/server/conf/asr/asr_pd.yaml deleted file mode 100644 index 4c415ac7..00000000 --- a/tests/unit/server/conf/asr/asr_pd.yaml +++ /dev/null @@ -1,26 +0,0 @@ -# This is the parameter configuration file for ASR server. -# These are the static models that support paddle inference. - -################################################################## -# ACOUSTIC MODEL SETTING # -# am choices=['deepspeech2offline_aishell'] TODO -################################################################## -model_type: 'deepspeech2offline_aishell' -am_model: # the pdmodel file of am static model [optional] -am_params: # the pdiparams file of am static model [optional] -lang: 'zh' -sample_rate: 16000 -cfg_path: -decode_method: -force_yes: True - -am_predictor_conf: - device: # set 'gpu:id' or 'cpu' - switch_ir_optim: True - glog_info: False # True -> print glog - summary: True # False -> do not show predictor config - - -################################################################## -# OTHERS # -################################################################## diff --git a/tests/unit/server/conf/tts/tts.yaml b/tests/unit/server/conf/tts/tts.yaml deleted file mode 100644 index 19207f0b..00000000 --- a/tests/unit/server/conf/tts/tts.yaml +++ /dev/null @@ -1,32 +0,0 @@ -# This is the parameter configuration file for TTS server. - -################################################################## -# ACOUSTIC MODEL SETTING # -# am choices=['speedyspeech_csmsc', 'fastspeech2_csmsc', -# 'fastspeech2_ljspeech', 'fastspeech2_aishell3', -# 'fastspeech2_vctk'] -################################################################## -am: 'fastspeech2_csmsc' -am_config: -am_ckpt: -am_stat: -phones_dict: -tones_dict: -speaker_dict: -spk_id: 0 - -################################################################## -# VOCODER SETTING # -# voc choices=['pwgan_csmsc', 'pwgan_ljspeech', 'pwgan_aishell3', -# 'pwgan_vctk', 'mb_melgan_csmsc'] -################################################################## -voc: 'pwgan_csmsc' -voc_config: -voc_ckpt: -voc_stat: - -################################################################## -# OTHERS # -################################################################## -lang: 'zh' -device: # set 'gpu:id' or 'cpu' diff --git a/tests/unit/server/conf/tts/tts_pd.yaml b/tests/unit/server/conf/tts/tts_pd.yaml deleted file mode 100644 index e27b9665..00000000 --- a/tests/unit/server/conf/tts/tts_pd.yaml +++ /dev/null @@ -1,42 +0,0 @@ -# This is the parameter configuration file for TTS server. -# These are the static models that support paddle inference. - -################################################################## -# ACOUSTIC MODEL SETTING # -# am choices=['speedyspeech_csmsc', 'fastspeech2_csmsc'] -################################################################## -am: 'fastspeech2_csmsc' -am_model: # the pdmodel file of your am static model (XX.pdmodel) -am_params: # the pdiparams file of your am static model (XX.pdipparams) -am_sample_rate: 24000 -phones_dict: -tones_dict: -speaker_dict: -spk_id: 0 - -am_predictor_conf: - device: # set 'gpu:id' or 'cpu' - switch_ir_optim: True - glog_info: False # True -> print glog - summary: True # False -> do not show predictor config - - -################################################################## -# VOCODER SETTING # -# voc choices=['pwgan_csmsc', 'mb_melgan_csmsc','hifigan_csmsc'] -################################################################## -voc: 'pwgan_csmsc' -voc_model: # the pdmodel file of your vocoder static model (XX.pdmodel) -voc_params: # the pdiparams file of your vocoder static model (XX.pdipparams) -voc_sample_rate: 24000 - -voc_predictor_conf: - device: # set 'gpu:id' or 'cpu' - switch_ir_optim: True - glog_info: False # True -> print glog - summary: True # False -> do not show predictor config - -################################################################## -# OTHERS # -################################################################## -lang: 'zh' diff --git a/tests/unit/server/test_server_client.sh b/tests/unit/server/test_server_client.sh index 795a23e0..b48e7111 100644 --- a/tests/unit/server/test_server_client.sh +++ b/tests/unit/server/test_server_client.sh @@ -99,8 +99,8 @@ echo "************************************************************************** # start server: asr engine type: python; tts engine type: python; device: cpu -python change_yaml.py --change_task speech-asr-cpu # change asr.yaml device: cpu -python change_yaml.py --change_task speech-tts-cpu # change tts.yaml device: cpu +python change_yaml.py --change_task device-asr_python-cpu # change asr.yaml device: cpu +python change_yaml.py --change_task device-tts_python-cpu # change tts.yaml device: cpu echo "Start the service: asr engine type: python; tts engine type: python; device: cpu" | tee -a ./log/test_result.log ((target_start_num+=1)) @@ -125,8 +125,8 @@ echo "************************************************************************** # start server: asr engine type: inference; tts engine type: inference; device: gpu -python change_yaml.py --change_task app-asr-inference # change application.yaml, asr engine_type: inference; asr engine_backend: asr_pd.yaml -python change_yaml.py --change_task app-tts-inference # change application.yaml, tts engine_type: inference; tts engine_backend: tts_pd.yaml +python change_yaml.py --change_task enginetype-asr_inference # change application.yaml, asr engine_type: inference; asr engine_backend: asr_pd.yaml +python change_yaml.py --change_task enginetype-tts_inference # change application.yaml, tts engine_type: inference; tts engine_backend: tts_pd.yaml echo "Start the service: asr engine type: inference; tts engine type: inference; device: gpu" | tee -a ./log/test_result.log ((target_start_num+=1)) @@ -151,8 +151,8 @@ echo "************************************************************************** # start server: asr engine type: inference; tts engine type: inference; device: cpu -python change_yaml.py --change_task speech-asr_pd-cpu # change asr_pd.yaml device: cpu -python change_yaml.py --change_task speech-tts_pd-cpu # change tts_pd.yaml device: cpu +python change_yaml.py --change_task device-asr_inference-cpu # change asr_pd.yaml device: cpu +python change_yaml.py --change_task device-tts_inference-cpu # change tts_pd.yaml device: cpu echo "start the service: asr engine type: inference; tts engine type: inference; device: cpu" | tee -a ./log/test_result.log ((target_start_num+=1)) @@ -182,4 +182,5 @@ echo "***************** Here are all the test results ********************" cat ./log/test_result.log # Restoring conf is the same as demos/speech_server +rm -rf ./conf cp ../../../demos/speech_server/conf/ ./ -rf \ No newline at end of file