diff --git a/demos/speech_server/conf/application.yaml b/demos/speech_server/conf/application.yaml index c6588ce8..9c171c47 100644 --- a/demos/speech_server/conf/application.yaml +++ b/demos/speech_server/conf/application.yaml @@ -7,7 +7,7 @@ host: 0.0.0.0 port: 8090 # The task format in the engin_list is: _ -# task choices = ['asr_python', 'asr_inference', 'tts_python', 'tts_inference', 'cls_python', 'cls_inference'] +# task choices = ['asr_python', 'asr_inference', 'tts_python', 'tts_inference', 'cls_python', 'cls_inference', 'text_python', 'vector_python'] protocol: 'http' engine_list: ['asr_python', 'tts_python', 'cls_python', 'text_python', 'vector_python'] @@ -28,7 +28,6 @@ asr_python: force_yes: True device: # set 'gpu:id' or 'cpu' - ################### speech task: asr; engine_type: inference ####################### asr_inference: # model_type choices=['deepspeech2offline_aishell'] @@ -50,10 +49,11 @@ asr_inference: ################################### TTS ######################################### ################### speech task: tts; engine_type: python ####################### -tts_python: - # am (acoustic model) choices=['speedyspeech_csmsc', 'fastspeech2_csmsc', - # 'fastspeech2_ljspeech', 'fastspeech2_aishell3', - # 'fastspeech2_vctk'] +tts_python: + # am (acoustic model) choices=['speedyspeech_csmsc', 'fastspeech2_csmsc', + # 'fastspeech2_ljspeech', 'fastspeech2_aishell3', + # 'fastspeech2_vctk', 'fastspeech2_mix', + # 'tacotron2_csmsc', 'tacotron2_ljspeech'] am: 'fastspeech2_csmsc' am_config: am_ckpt: @@ -64,8 +64,10 @@ tts_python: spk_id: 0 # voc (vocoder) choices=['pwgan_csmsc', 'pwgan_ljspeech', 'pwgan_aishell3', - # 'pwgan_vctk', 'mb_melgan_csmsc'] - voc: 'pwgan_csmsc' + # 'pwgan_vctk', 'mb_melgan_csmsc', 'style_melgan_csmsc', + # 'hifigan_csmsc', 'hifigan_ljspeech', 'hifigan_aishell3', + # 'hifigan_vctk', 'wavernn_csmsc'] + voc: 'mb_melgan_csmsc' voc_config: voc_ckpt: voc_stat: @@ -94,7 +96,7 @@ tts_inference: summary: True # False -> do not show predictor config # voc (vocoder) choices=['pwgan_csmsc', 'mb_melgan_csmsc','hifigan_csmsc'] - voc: 'pwgan_csmsc' + voc: 'mb_melgan_csmsc' voc_model: # the pdmodel file of your vocoder static model (XX.pdmodel) voc_params: # the pdiparams file of your vocoder static model (XX.pdipparams) voc_sample_rate: 24000 diff --git a/demos/streaming_tts_server/conf/tts_online_application.yaml b/demos/streaming_tts_server/conf/tts_online_application.yaml index 0460a5e1..e617912f 100644 --- a/demos/streaming_tts_server/conf/tts_online_application.yaml +++ b/demos/streaming_tts_server/conf/tts_online_application.yaml @@ -79,7 +79,7 @@ tts_online-onnx: # voc (vocoder) choices=['mb_melgan_csmsc_onnx, hifigan_csmsc_onnx'] # Both mb_melgan_csmsc_onnx and hifigan_csmsc_onnx support streaming voc inference - voc: 'hifigan_csmsc_onnx' + voc: 'mb_melgan_csmsc_onnx' voc_ckpt: voc_sample_rate: 24000 voc_sess_conf: @@ -100,4 +100,4 @@ tts_online-onnx: voc_pad: 14 # voc_upsample should be same as n_shift on voc config. voc_upsample: 300 - + \ No newline at end of file diff --git a/demos/streaming_tts_server/conf/tts_online_ws_application.yaml b/demos/streaming_tts_server/conf/tts_online_ws_application.yaml index 146f06f1..329f882c 100644 --- a/demos/streaming_tts_server/conf/tts_online_ws_application.yaml +++ b/demos/streaming_tts_server/conf/tts_online_ws_application.yaml @@ -79,7 +79,7 @@ tts_online-onnx: # voc (vocoder) choices=['mb_melgan_csmsc_onnx, hifigan_csmsc_onnx'] # Both mb_melgan_csmsc_onnx and hifigan_csmsc_onnx support streaming voc inference - voc: 'hifigan_csmsc_onnx' + voc: 'mb_melgan_csmsc_onnx' voc_ckpt: voc_sample_rate: 24000 voc_sess_conf: @@ -100,4 +100,4 @@ tts_online-onnx: voc_pad: 14 # voc_upsample should be same as n_shift on voc config. voc_upsample: 300 - + \ No newline at end of file