update server cli, test=doc

4 years ago · 6b2dd16845
parent fe350ddddf
commit 6b2dd16845
15 changed files with 115 additions and 77 deletions
--- a/demos/speech_server/conf/application.yaml
+++ b/demos/speech_server/conf/application.yaml
@ -9,9 +9,17 @@ port: 8090
 ##################################################################
 #                     CONFIG FILE                                #
 ##################################################################
-# add engine type (Options: asr, tts) and config file here.
+# The engine_type of speech task needs to keep the same type as the config file of speech task.
 # E.g: The engine_type of asr is 'python', the engine_backend of asr is 'XX/asr.yaml'
 # E.g: The engine_type of asr is 'inference', the engine_backend of asr is 'XX/asr_pd.yaml'
 #
 # add engine type (Options: python, inference) 
 engine_type:
    asr: 'inference'
    tts: 'inference'
 # add engine backend type (Options: asr, tts) and config file here.
 # Adding a speech task to engine_backend means starting the service.
 engine_backend:
-    asr: 'conf/asr/asr.yaml'
+    asr: 'conf/asr/asr_pd.yaml'
-    tts: 'conf/tts/tts.yaml'
+    tts: 'conf/tts/tts_pd.yaml'
--- a/demos/speech_server/conf/asr/asr.yaml
+++ b/demos/speech_server/conf/asr/asr.yaml
@ -1,7 +1,8 @@
 model: 'conformer_wenetspeech'
 lang: 'zh'
 sample_rate: 16000
-cfg_path: 
+cfg_path: # [optional]
-ckpt_path: 
+ckpt_path: # [optional]
 decode_method: 'attention_rescoring'
-force_yes: False
+force_yes: True
 device: 'gpu:3'  # set 'gpu:id' or 'cpu'
--- a/demos/speech_server/conf/asr/asr_pd.yaml
+++ b/demos/speech_server/conf/asr/asr_pd.yaml
@ -0,0 +1,25 @@
 # This is the parameter configuration file for ASR server.
 # These are the static models that support paddle inference.
 ##################################################################
 #                  ACOUSTIC MODEL SETTING                        #
 # am choices=['deepspeech2offline_aishell'] TODO
 ##################################################################
 model_type: 'deepspeech2offline_aishell'
 am_model: # the pdmodel file of am static model [optional]
 am_params:  # the pdiparams file of am static model [optional]
 lang: 'zh'
 sample_rate: 16000
 cfg_path: 
 decode_method: 
 force_yes: True
 am_predictor_conf:
  device: 'gpu:3'  # set 'gpu:id' or 'cpu'
  enable_mkldnn: True
  switch_ir_optim: True
 ##################################################################
 #                            OTHERS                              #
 ##################################################################
--- a/demos/speech_server/conf/tts/tts.yaml
+++ b/demos/speech_server/conf/tts/tts.yaml
@ -29,4 +29,4 @@ voc_stat:
 #                            OTHERS                              #
 ##################################################################
 lang: 'zh'
-device: 'gpu:2'
+device: 'gpu:3'  # set 'gpu:id' or 'cpu'
--- a/demos/speech_server/conf/tts/tts_pd.yaml
+++ b/demos/speech_server/conf/tts/tts_pd.yaml
@ -6,8 +6,8 @@
 # am choices=['speedyspeech_csmsc', 'fastspeech2_csmsc']
 ##################################################################
 am: 'fastspeech2_csmsc'   
-am_model: # the pdmodel file of am static model
+am_model: # the pdmodel file of your am static model (XX.pdmodel)
-am_params: # the pdiparams file of am static model
+am_params: # the pdiparams file of your am static model (XX.pdipparams)
 am_sample_rate: 24000
 phones_dict: 
 tones_dict: 
@ -15,9 +15,9 @@ speaker_dict:
 spk_id: 0
 am_predictor_conf:
-  use_gpu: True
+  device: 'gpu:3'  # set 'gpu:id' or 'cpu'
-  enable_mkldnn: True
+  enable_mkldnn: False
-  switch_ir_optim: True
+  switch_ir_optim: False
 ##################################################################
@ -25,17 +25,16 @@ am_predictor_conf:
 # voc choices=['pwgan_csmsc', 'mb_melgan_csmsc','hifigan_csmsc']
 ##################################################################
 voc: 'pwgan_csmsc'
-voc_model: # the pdmodel file of vocoder static model
+voc_model: # the pdmodel file of your vocoder static model (XX.pdmodel)
-voc_params: # the pdiparams file of vocoder static model 
+voc_params: # the pdiparams file of your vocoder static model (XX.pdipparams)
 voc_sample_rate: 24000
 voc_predictor_conf:
-  use_gpu: True
+  device: 'gpu:3'  # set 'gpu:id' or 'cpu'
-  enable_mkldnn: True  
+  enable_mkldnn: False  
-  switch_ir_optim: True  
+  switch_ir_optim: False  
 ##################################################################
 #                            OTHERS                              #
 ##################################################################
 lang: 'zh'
 device: paddle.get_device()
--- a/paddlespeech/server/bin/paddlespeech_server.py
+++ b/paddlespeech/server/bin/paddlespeech_server.py
@ -20,7 +20,7 @@ from fastapi import FastAPI
 from ..executor import BaseExecutor
 from ..util import cli_server_register
 from ..util import stats_wrapper
-from paddlespeech.server.engine.engine_factory import EngineFactory
+from paddlespeech.server.engine.engine_pool import init_engine_pool
 from paddlespeech.server.restful.api import setup_router
 from paddlespeech.server.utils.config import get_config
@ -51,8 +51,10 @@ class ServerExecutor(BaseExecutor):
    def init(self, config) -> bool:
        """system initialization
        Args:
            config (CfgNode): config object
        Returns:
            bool: 
        """
@ -61,12 +63,7 @@ class ServerExecutor(BaseExecutor):
        api_router = setup_router(api_list)
        app.include_router(api_router)
-        # init engine
+        if not init_engine_pool(config):
        engine_pool = []
        for engine in config.engine_backend:
            engine_pool.append(EngineFactory.get_engine(engine_name=engine))
            if not engine_pool[-1].init(
                    config_file=config.engine_backend[engine]):
            return False
        return True
--- a/paddlespeech/server/conf/application.yaml
+++ b/paddlespeech/server/conf/application.yaml
@ -9,12 +9,17 @@ port: 8090
 ##################################################################
 #                     CONFIG FILE                                #
 ##################################################################
 # The engine_type of speech task needs to keep the same type as the config file of speech task.
 # E.g: The engine_type of asr is 'python', the engine_backend of asr is 'XX/asr.yaml'
 # E.g: The engine_type of asr is 'inference', the engine_backend of asr is 'XX/asr_pd.yaml'
 #
 # add engine type (Options: python, inference) 
 engine_type:
-    asr: 'inference'
+    asr: 'python'
-    # tts: 'inference'
+    tts: 'python'
 # add engine backend type (Options: asr, tts) and config file here.
 # Adding a speech task to engine_backend means starting the service.
 engine_backend:
-    asr: 'conf/asr/asr_pd.yaml'
+    asr: 'conf/asr/asr.yaml'
-    #tts: 'conf/tts/tts_pd.yaml'
+    tts: 'conf/tts/tts.yaml'
--- a/paddlespeech/server/conf/asr/asr.yaml
+++ b/paddlespeech/server/conf/asr/asr.yaml
@ -5,3 +5,4 @@ cfg_path: # [optional]
 ckpt_path: # [optional]
 decode_method: 'attention_rescoring'
 force_yes: True
 device: 'gpu:3'  # set 'gpu:id' or 'cpu'
--- a/paddlespeech/server/conf/asr/asr_pd.yaml
+++ b/paddlespeech/server/conf/asr/asr_pd.yaml
@ -15,7 +15,7 @@ decode_method:
 force_yes: True
 am_predictor_conf:
-  use_gpu: True
+  device: 'gpu:3'  # set 'gpu:id' or 'cpu'
  enable_mkldnn: True
  switch_ir_optim: True
--- a/paddlespeech/server/conf/tts/tts.yaml
+++ b/paddlespeech/server/conf/tts/tts.yaml
@ -29,4 +29,4 @@ voc_stat:
 #                            OTHERS                              #
 ##################################################################
 lang: 'zh'
-device: paddle.get_device()
+device: 'gpu:3'  # set 'gpu:id' or 'cpu'
--- a/paddlespeech/server/conf/tts/tts_pd.yaml
+++ b/paddlespeech/server/conf/tts/tts_pd.yaml
@ -6,18 +6,18 @@
 # am choices=['speedyspeech_csmsc', 'fastspeech2_csmsc']
 ##################################################################
 am: 'fastspeech2_csmsc'   
-am_model: # the pdmodel file of am static model
+am_model: # the pdmodel file of your am static model (XX.pdmodel)
-am_params: # the pdiparams file of am static model
+am_params: # the pdiparams file of your am static model (XX.pdipparams)
-am_sample_rate: 24000
+am_sample_rate: 24000    # must match the model
 phones_dict: 
 tones_dict: 
 speaker_dict: 
 spk_id: 0
 am_predictor_conf:
-  use_gpu: True
+  device: 'gpu:3'  # set 'gpu:id' or 'cpu'
-  enable_mkldnn: True
+  enable_mkldnn: False
-  switch_ir_optim: True
+  switch_ir_optim: False
 ##################################################################
@ -25,17 +25,16 @@ am_predictor_conf:
 # voc choices=['pwgan_csmsc', 'mb_melgan_csmsc','hifigan_csmsc']
 ##################################################################
 voc: 'pwgan_csmsc'
-voc_model: # the pdmodel file of vocoder static model
+voc_model: # the pdmodel file of your vocoder static model (XX.pdmodel)
-voc_params: # the pdiparams file of vocoder static model 
+voc_params: # the pdiparams file of your vocoder static model (XX.pdipparams)
-voc_sample_rate: 24000
+voc_sample_rate: 24000    #must match the model
 voc_predictor_conf:
-  use_gpu: True
+  device: 'gpu:3'   # set 'gpu:id' or 'cpu'
-  enable_mkldnn: True  
+  enable_mkldnn: False
-  switch_ir_optim: True  
+  switch_ir_optim: False  
 ##################################################################
 #                            OTHERS                              #
 ##################################################################
 lang: 'zh'
 device: paddle.get_device()
--- a/paddlespeech/server/engine/asr/python/asr_engine.py
+++ b/paddlespeech/server/engine/asr/python/asr_engine.py
@ -12,21 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import io
 import os
 from typing import List
 from typing import Optional
 from typing import Union
 import librosa
 import paddle
 import soundfile
 from paddlespeech.cli.asr.infer import ASRExecutor
 from paddlespeech.cli.log import logger
 from paddlespeech.s2t.frontend.featurizer.text_featurizer import TextFeaturizer
 from paddlespeech.s2t.transform.transformation import Transformation
 from paddlespeech.s2t.utils.dynamic_import import dynamic_import
 from paddlespeech.s2t.utils.utility import UpdateConfig
 from paddlespeech.server.engine.base_engine import BaseEngine
 from paddlespeech.server.utils.config import get_config
@ -63,7 +53,7 @@ class ASREngine(BaseEngine):
        self.executor = ASRServerExecutor()
        self.config = get_config(config_file)
-        paddle.set_device(paddle.get_device())
+        paddle.set_device(self.config.device)
        self.executor._init_from_path(
            self.config.model, self.config.lang, self.config.sample_rate,
            self.config.cfg_path, self.config.decode_method,
--- a/paddlespeech/server/engine/tts/paddleinference/tts_engine.py
+++ b/paddlespeech/server/engine/tts/paddleinference/tts_engine.py
@ -344,7 +344,6 @@ class TTSEngine(BaseEngine):
        try:
            self.config = get_config(config_file)
            self.executor._init_from_path(
                am=self.config.am,
                am_model=self.config.am_model,
--- a/paddlespeech/server/restful/tts_api.py
+++ b/paddlespeech/server/restful/tts_api.py
@ -16,7 +16,7 @@ from typing import Union
 from fastapi import APIRouter
-from paddlespeech.server.engine.tts.paddleinference.tts_engine import TTSEngine
+from paddlespeech.server.engine.engine_pool import get_engine_pool
 from paddlespeech.server.restful.request import TTSRequest
 from paddlespeech.server.restful.response import ErrorResponse
 from paddlespeech.server.restful.response import TTSResponse
@ -60,28 +60,41 @@ def tts(request_body: TTSRequest):
    Returns:
        json: [description]
    """
-    # json to dict 
+    # get params
-    item_dict = request_body.dict()
+    text = request_body.text
-    sentence = item_dict['text']
+    spk_id = request_body.spk_id
-    spk_id = item_dict['spk_id']
+    speed = request_body.speed
-    speed = item_dict['speed']
+    volume = request_body.volume
-    volume = item_dict['volume']
+    sample_rate = request_body.sample_rate
-    sample_rate = item_dict['sample_rate']
+    save_path = request_body.save_path
    save_path = item_dict['save_path']
    # Check parameters
-    if speed <=0 or speed > 3 or volume <=0 or volume > 3 or \
+    if speed <= 0 or speed > 3:
-        sample_rate not in [0, 16000, 8000] or \
+        return failed_response(
-        (save_path is not None and not save_path.endswith("pcm") and not save_path.endswith("wav")):
+            ErrorCode.SERVER_PARAM_ERR,
-        return failed_response(ErrorCode.SERVER_PARAM_ERR)
+            "invalid speed value, the value should be between 0 and 3.")
-
+    if volume <= 0 or volume > 3:
-    # single
+        return failed_response(
-    tts_engine = TTSEngine()
+            ErrorCode.SERVER_PARAM_ERR,
            "invalid volume value, the value should be between 0 and 3.")
    if sample_rate not in [0, 16000, 8000]:
        return failed_response(
            ErrorCode.SERVER_PARAM_ERR,
            "invalid sample_rate value, the choice of value is 0, 8000, 16000.")
    if save_path is not None and not save_path.endswith(
            "pcm") and not save_path.endswith("wav"):
        return failed_response(
            ErrorCode.SERVER_PARAM_ERR,
            "invalid save_path, saved audio formats support pcm and wav")
    # run
    try:
        # get single engine from engine pool
        engine_pool = get_engine_pool()
        tts_engine = engine_pool['tts']
        lang, target_sample_rate, wav_base64 = tts_engine.run(
-            sentence, spk_id, speed, volume, sample_rate, save_path)
+            text, spk_id, speed, volume, sample_rate, save_path)
        response = {
            "success": True,
--- a/paddlespeech/server/utils/paddle_predictor.py
+++ b/paddlespeech/server/utils/paddle_predictor.py
@ -41,8 +41,9 @@ def init_predictor(model_dir: Optional[os.PathLike]=None,
        config = Config(model_file, params_file)
    config.enable_memory_optim()
-    if predictor_conf["use_gpu"]:
+    if "gpu" in predictor_conf["device"]:
-        config.enable_use_gpu(1000, 0)
+        gpu_id = predictor_conf["device"].split(":")[-1]
        config.enable_use_gpu(1000, int(gpu_id))
    if predictor_conf["enable_mkldnn"]:
        config.enable_mkldnn()
    if predictor_conf["switch_ir_optim"]: