From 6b2dd1684503f45165e3dd1d34a605245b37aea3 Mon Sep 17 00:00:00 2001 From: lym0302 Date: Thu, 24 Feb 2022 17:44:56 +0800 Subject: [PATCH] update server cli, test=doc --- demos/speech_server/conf/application.yaml | 16 +++++-- demos/speech_server/conf/asr/asr.yaml | 7 +-- demos/speech_server/conf/asr/asr_pd.yaml | 25 ++++++++++ demos/speech_server/conf/tts/tts.yaml | 2 +- demos/speech_server/conf/tts/tts_pd.yaml | 21 ++++----- .../server/bin/paddlespeech_server.py | 13 ++--- paddlespeech/server/conf/application.yaml | 13 +++-- paddlespeech/server/conf/asr/asr.yaml | 1 + paddlespeech/server/conf/asr/asr_pd.yaml | 2 +- paddlespeech/server/conf/tts/tts.yaml | 2 +- paddlespeech/server/conf/tts/tts_pd.yaml | 25 +++++----- .../server/engine/asr/python/asr_engine.py | 12 +---- .../engine/tts/paddleinference/tts_engine.py | 1 - paddlespeech/server/restful/tts_api.py | 47 ++++++++++++------- paddlespeech/server/utils/paddle_predictor.py | 5 +- 15 files changed, 115 insertions(+), 77 deletions(-) create mode 100644 demos/speech_server/conf/asr/asr_pd.yaml diff --git a/demos/speech_server/conf/application.yaml b/demos/speech_server/conf/application.yaml index c8d71f2f..fd4f5f37 100644 --- a/demos/speech_server/conf/application.yaml +++ b/demos/speech_server/conf/application.yaml @@ -9,9 +9,17 @@ port: 8090 ################################################################## # CONFIG FILE # ################################################################## -# add engine type (Options: asr, tts) and config file here. +# The engine_type of speech task needs to keep the same type as the config file of speech task. +# E.g: The engine_type of asr is 'python', the engine_backend of asr is 'XX/asr.yaml' +# E.g: The engine_type of asr is 'inference', the engine_backend of asr is 'XX/asr_pd.yaml' +# +# add engine type (Options: python, inference) +engine_type: + asr: 'inference' + tts: 'inference' +# add engine backend type (Options: asr, tts) and config file here. +# Adding a speech task to engine_backend means starting the service. engine_backend: - asr: 'conf/asr/asr.yaml' - tts: 'conf/tts/tts.yaml' - + asr: 'conf/asr/asr_pd.yaml' + tts: 'conf/tts/tts_pd.yaml' diff --git a/demos/speech_server/conf/asr/asr.yaml b/demos/speech_server/conf/asr/asr.yaml index 4c3b0a67..b1ef558d 100644 --- a/demos/speech_server/conf/asr/asr.yaml +++ b/demos/speech_server/conf/asr/asr.yaml @@ -1,7 +1,8 @@ model: 'conformer_wenetspeech' lang: 'zh' sample_rate: 16000 -cfg_path: -ckpt_path: +cfg_path: # [optional] +ckpt_path: # [optional] decode_method: 'attention_rescoring' -force_yes: False +force_yes: True +device: 'gpu:3' # set 'gpu:id' or 'cpu' diff --git a/demos/speech_server/conf/asr/asr_pd.yaml b/demos/speech_server/conf/asr/asr_pd.yaml new file mode 100644 index 00000000..21bf7177 --- /dev/null +++ b/demos/speech_server/conf/asr/asr_pd.yaml @@ -0,0 +1,25 @@ +# This is the parameter configuration file for ASR server. +# These are the static models that support paddle inference. + +################################################################## +# ACOUSTIC MODEL SETTING # +# am choices=['deepspeech2offline_aishell'] TODO +################################################################## +model_type: 'deepspeech2offline_aishell' +am_model: # the pdmodel file of am static model [optional] +am_params: # the pdiparams file of am static model [optional] +lang: 'zh' +sample_rate: 16000 +cfg_path: +decode_method: +force_yes: True + +am_predictor_conf: + device: 'gpu:3' # set 'gpu:id' or 'cpu' + enable_mkldnn: True + switch_ir_optim: True + + +################################################################## +# OTHERS # +################################################################## diff --git a/demos/speech_server/conf/tts/tts.yaml b/demos/speech_server/conf/tts/tts.yaml index cb4829c8..8d45aec5 100644 --- a/demos/speech_server/conf/tts/tts.yaml +++ b/demos/speech_server/conf/tts/tts.yaml @@ -29,4 +29,4 @@ voc_stat: # OTHERS # ################################################################## lang: 'zh' -device: 'gpu:2' +device: 'gpu:3' # set 'gpu:id' or 'cpu' diff --git a/demos/speech_server/conf/tts/tts_pd.yaml b/demos/speech_server/conf/tts/tts_pd.yaml index c268c6a3..ecfa3a3b 100644 --- a/demos/speech_server/conf/tts/tts_pd.yaml +++ b/demos/speech_server/conf/tts/tts_pd.yaml @@ -6,8 +6,8 @@ # am choices=['speedyspeech_csmsc', 'fastspeech2_csmsc'] ################################################################## am: 'fastspeech2_csmsc' -am_model: # the pdmodel file of am static model -am_params: # the pdiparams file of am static model +am_model: # the pdmodel file of your am static model (XX.pdmodel) +am_params: # the pdiparams file of your am static model (XX.pdipparams) am_sample_rate: 24000 phones_dict: tones_dict: @@ -15,9 +15,9 @@ speaker_dict: spk_id: 0 am_predictor_conf: - use_gpu: True - enable_mkldnn: True - switch_ir_optim: True + device: 'gpu:3' # set 'gpu:id' or 'cpu' + enable_mkldnn: False + switch_ir_optim: False ################################################################## @@ -25,17 +25,16 @@ am_predictor_conf: # voc choices=['pwgan_csmsc', 'mb_melgan_csmsc','hifigan_csmsc'] ################################################################## voc: 'pwgan_csmsc' -voc_model: # the pdmodel file of vocoder static model -voc_params: # the pdiparams file of vocoder static model +voc_model: # the pdmodel file of your vocoder static model (XX.pdmodel) +voc_params: # the pdiparams file of your vocoder static model (XX.pdipparams) voc_sample_rate: 24000 voc_predictor_conf: - use_gpu: True - enable_mkldnn: True - switch_ir_optim: True + device: 'gpu:3' # set 'gpu:id' or 'cpu' + enable_mkldnn: False + switch_ir_optim: False ################################################################## # OTHERS # ################################################################## lang: 'zh' -device: paddle.get_device() diff --git a/paddlespeech/server/bin/paddlespeech_server.py b/paddlespeech/server/bin/paddlespeech_server.py index 7c88d8a0..ad62d3f6 100644 --- a/paddlespeech/server/bin/paddlespeech_server.py +++ b/paddlespeech/server/bin/paddlespeech_server.py @@ -20,7 +20,7 @@ from fastapi import FastAPI from ..executor import BaseExecutor from ..util import cli_server_register from ..util import stats_wrapper -from paddlespeech.server.engine.engine_factory import EngineFactory +from paddlespeech.server.engine.engine_pool import init_engine_pool from paddlespeech.server.restful.api import setup_router from paddlespeech.server.utils.config import get_config @@ -51,8 +51,10 @@ class ServerExecutor(BaseExecutor): def init(self, config) -> bool: """system initialization + Args: config (CfgNode): config object + Returns: bool: """ @@ -61,13 +63,8 @@ class ServerExecutor(BaseExecutor): api_router = setup_router(api_list) app.include_router(api_router) - # init engine - engine_pool = [] - for engine in config.engine_backend: - engine_pool.append(EngineFactory.get_engine(engine_name=engine)) - if not engine_pool[-1].init( - config_file=config.engine_backend[engine]): - return False + if not init_engine_pool(config): + return False return True diff --git a/paddlespeech/server/conf/application.yaml b/paddlespeech/server/conf/application.yaml index 154ef9af..cc08665e 100644 --- a/paddlespeech/server/conf/application.yaml +++ b/paddlespeech/server/conf/application.yaml @@ -9,12 +9,17 @@ port: 8090 ################################################################## # CONFIG FILE # ################################################################## +# The engine_type of speech task needs to keep the same type as the config file of speech task. +# E.g: The engine_type of asr is 'python', the engine_backend of asr is 'XX/asr.yaml' +# E.g: The engine_type of asr is 'inference', the engine_backend of asr is 'XX/asr_pd.yaml' +# # add engine type (Options: python, inference) engine_type: - asr: 'inference' - # tts: 'inference' + asr: 'python' + tts: 'python' # add engine backend type (Options: asr, tts) and config file here. +# Adding a speech task to engine_backend means starting the service. engine_backend: - asr: 'conf/asr/asr_pd.yaml' - #tts: 'conf/tts/tts_pd.yaml' + asr: 'conf/asr/asr.yaml' + tts: 'conf/tts/tts.yaml' diff --git a/paddlespeech/server/conf/asr/asr.yaml b/paddlespeech/server/conf/asr/asr.yaml index 50e55a3c..b1ef558d 100644 --- a/paddlespeech/server/conf/asr/asr.yaml +++ b/paddlespeech/server/conf/asr/asr.yaml @@ -5,3 +5,4 @@ cfg_path: # [optional] ckpt_path: # [optional] decode_method: 'attention_rescoring' force_yes: True +device: 'gpu:3' # set 'gpu:id' or 'cpu' diff --git a/paddlespeech/server/conf/asr/asr_pd.yaml b/paddlespeech/server/conf/asr/asr_pd.yaml index 43a63f1b..21bf7177 100644 --- a/paddlespeech/server/conf/asr/asr_pd.yaml +++ b/paddlespeech/server/conf/asr/asr_pd.yaml @@ -15,7 +15,7 @@ decode_method: force_yes: True am_predictor_conf: - use_gpu: True + device: 'gpu:3' # set 'gpu:id' or 'cpu' enable_mkldnn: True switch_ir_optim: True diff --git a/paddlespeech/server/conf/tts/tts.yaml b/paddlespeech/server/conf/tts/tts.yaml index d0e128ea..8d45aec5 100644 --- a/paddlespeech/server/conf/tts/tts.yaml +++ b/paddlespeech/server/conf/tts/tts.yaml @@ -29,4 +29,4 @@ voc_stat: # OTHERS # ################################################################## lang: 'zh' -device: paddle.get_device() \ No newline at end of file +device: 'gpu:3' # set 'gpu:id' or 'cpu' diff --git a/paddlespeech/server/conf/tts/tts_pd.yaml b/paddlespeech/server/conf/tts/tts_pd.yaml index c268c6a3..cd4b8583 100644 --- a/paddlespeech/server/conf/tts/tts_pd.yaml +++ b/paddlespeech/server/conf/tts/tts_pd.yaml @@ -6,18 +6,18 @@ # am choices=['speedyspeech_csmsc', 'fastspeech2_csmsc'] ################################################################## am: 'fastspeech2_csmsc' -am_model: # the pdmodel file of am static model -am_params: # the pdiparams file of am static model -am_sample_rate: 24000 +am_model: # the pdmodel file of your am static model (XX.pdmodel) +am_params: # the pdiparams file of your am static model (XX.pdipparams) +am_sample_rate: 24000 # must match the model phones_dict: tones_dict: speaker_dict: spk_id: 0 am_predictor_conf: - use_gpu: True - enable_mkldnn: True - switch_ir_optim: True + device: 'gpu:3' # set 'gpu:id' or 'cpu' + enable_mkldnn: False + switch_ir_optim: False ################################################################## @@ -25,17 +25,16 @@ am_predictor_conf: # voc choices=['pwgan_csmsc', 'mb_melgan_csmsc','hifigan_csmsc'] ################################################################## voc: 'pwgan_csmsc' -voc_model: # the pdmodel file of vocoder static model -voc_params: # the pdiparams file of vocoder static model -voc_sample_rate: 24000 +voc_model: # the pdmodel file of your vocoder static model (XX.pdmodel) +voc_params: # the pdiparams file of your vocoder static model (XX.pdipparams) +voc_sample_rate: 24000 #must match the model voc_predictor_conf: - use_gpu: True - enable_mkldnn: True - switch_ir_optim: True + device: 'gpu:3' # set 'gpu:id' or 'cpu' + enable_mkldnn: False + switch_ir_optim: False ################################################################## # OTHERS # ################################################################## lang: 'zh' -device: paddle.get_device() diff --git a/paddlespeech/server/engine/asr/python/asr_engine.py b/paddlespeech/server/engine/asr/python/asr_engine.py index fd67b029..60040051 100644 --- a/paddlespeech/server/engine/asr/python/asr_engine.py +++ b/paddlespeech/server/engine/asr/python/asr_engine.py @@ -12,21 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. import io -import os -from typing import List -from typing import Optional -from typing import Union -import librosa import paddle -import soundfile from paddlespeech.cli.asr.infer import ASRExecutor from paddlespeech.cli.log import logger -from paddlespeech.s2t.frontend.featurizer.text_featurizer import TextFeaturizer -from paddlespeech.s2t.transform.transformation import Transformation -from paddlespeech.s2t.utils.dynamic_import import dynamic_import -from paddlespeech.s2t.utils.utility import UpdateConfig from paddlespeech.server.engine.base_engine import BaseEngine from paddlespeech.server.utils.config import get_config @@ -63,7 +53,7 @@ class ASREngine(BaseEngine): self.executor = ASRServerExecutor() self.config = get_config(config_file) - paddle.set_device(paddle.get_device()) + paddle.set_device(self.config.device) self.executor._init_from_path( self.config.model, self.config.lang, self.config.sample_rate, self.config.cfg_path, self.config.decode_method, diff --git a/paddlespeech/server/engine/tts/paddleinference/tts_engine.py b/paddlespeech/server/engine/tts/paddleinference/tts_engine.py index 7679b02f..ecd2b0b6 100644 --- a/paddlespeech/server/engine/tts/paddleinference/tts_engine.py +++ b/paddlespeech/server/engine/tts/paddleinference/tts_engine.py @@ -344,7 +344,6 @@ class TTSEngine(BaseEngine): try: self.config = get_config(config_file) - self.executor._init_from_path( am=self.config.am, am_model=self.config.am_model, diff --git a/paddlespeech/server/restful/tts_api.py b/paddlespeech/server/restful/tts_api.py index d5fa1d42..11105147 100644 --- a/paddlespeech/server/restful/tts_api.py +++ b/paddlespeech/server/restful/tts_api.py @@ -16,7 +16,7 @@ from typing import Union from fastapi import APIRouter -from paddlespeech.server.engine.tts.paddleinference.tts_engine import TTSEngine +from paddlespeech.server.engine.engine_pool import get_engine_pool from paddlespeech.server.restful.request import TTSRequest from paddlespeech.server.restful.response import ErrorResponse from paddlespeech.server.restful.response import TTSResponse @@ -60,28 +60,41 @@ def tts(request_body: TTSRequest): Returns: json: [description] """ - # json to dict - item_dict = request_body.dict() - sentence = item_dict['text'] - spk_id = item_dict['spk_id'] - speed = item_dict['speed'] - volume = item_dict['volume'] - sample_rate = item_dict['sample_rate'] - save_path = item_dict['save_path'] + # get params + text = request_body.text + spk_id = request_body.spk_id + speed = request_body.speed + volume = request_body.volume + sample_rate = request_body.sample_rate + save_path = request_body.save_path # Check parameters - if speed <=0 or speed > 3 or volume <=0 or volume > 3 or \ - sample_rate not in [0, 16000, 8000] or \ - (save_path is not None and not save_path.endswith("pcm") and not save_path.endswith("wav")): - return failed_response(ErrorCode.SERVER_PARAM_ERR) - - # single - tts_engine = TTSEngine() + if speed <= 0 or speed > 3: + return failed_response( + ErrorCode.SERVER_PARAM_ERR, + "invalid speed value, the value should be between 0 and 3.") + if volume <= 0 or volume > 3: + return failed_response( + ErrorCode.SERVER_PARAM_ERR, + "invalid volume value, the value should be between 0 and 3.") + if sample_rate not in [0, 16000, 8000]: + return failed_response( + ErrorCode.SERVER_PARAM_ERR, + "invalid sample_rate value, the choice of value is 0, 8000, 16000.") + if save_path is not None and not save_path.endswith( + "pcm") and not save_path.endswith("wav"): + return failed_response( + ErrorCode.SERVER_PARAM_ERR, + "invalid save_path, saved audio formats support pcm and wav") # run try: + # get single engine from engine pool + engine_pool = get_engine_pool() + tts_engine = engine_pool['tts'] + lang, target_sample_rate, wav_base64 = tts_engine.run( - sentence, spk_id, speed, volume, sample_rate, save_path) + text, spk_id, speed, volume, sample_rate, save_path) response = { "success": True, diff --git a/paddlespeech/server/utils/paddle_predictor.py b/paddlespeech/server/utils/paddle_predictor.py index f910161b..f4216d74 100644 --- a/paddlespeech/server/utils/paddle_predictor.py +++ b/paddlespeech/server/utils/paddle_predictor.py @@ -41,8 +41,9 @@ def init_predictor(model_dir: Optional[os.PathLike]=None, config = Config(model_file, params_file) config.enable_memory_optim() - if predictor_conf["use_gpu"]: - config.enable_use_gpu(1000, 0) + if "gpu" in predictor_conf["device"]: + gpu_id = predictor_conf["device"].split(":")[-1] + config.enable_use_gpu(1000, int(gpu_id)) if predictor_conf["enable_mkldnn"]: config.enable_mkldnn() if predictor_conf["switch_ir_optim"]: