commit
ae7a73bc11
@ -1,3 +1,3 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
paddlespeech_server start --config_file ./conf/application.yaml
|
paddlespeech_server start --config_file ./conf/application.yaml &> server.log &
|
||||||
|
@ -0,0 +1,10 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
wget -c https://paddlespeech.bj.bcebos.com/vector/audio/85236145389.wav
|
||||||
|
wget -c https://paddlespeech.bj.bcebos.com/vector/audio/123456789.wav
|
||||||
|
|
||||||
|
# sid extract
|
||||||
|
paddlespeech_client vector --server_ip 127.0.0.1 --port 8090 --task spk --input ./85236145389.wav
|
||||||
|
|
||||||
|
# sid score
|
||||||
|
paddlespeech_client vector --server_ip 127.0.0.1 --port 8090 --task score --enroll ./85236145389.wav --test ./123456789.wav
|
@ -0,0 +1,4 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
|
||||||
|
paddlespeech_client text --server_ip 127.0.0.1 --port 8090 --input 今天的天气真好啊你下午有空吗我想约你一起去吃饭
|
@ -1,9 +1,8 @@
|
|||||||
export CUDA_VISIBLE_DEVICE=0,1,2,3
|
#export CUDA_VISIBLE_DEVICE=0,1,2,3
|
||||||
export CUDA_VISIBLE_DEVICE=0,1,2,3
|
|
||||||
|
|
||||||
# nohup python3 punc_server.py --config_file conf/punc_application.yaml > punc.log 2>&1 &
|
# nohup python3 local/punc_server.py --config_file conf/punc_application.yaml > punc.log 2>&1 &
|
||||||
paddlespeech_server start --config_file conf/punc_application.yaml &> punc.log &
|
paddlespeech_server start --config_file conf/punc_application.yaml &> punc.log &
|
||||||
|
|
||||||
# nohup python3 streaming_asr_server.py --config_file conf/ws_conformer_wenetspeech_application.yaml > streaming_asr.log 2>&1 &
|
# nohup python3 local/streaming_asr_server.py --config_file conf/ws_conformer_wenetspeech_application.yaml > streaming_asr.log 2>&1 &
|
||||||
paddlespeech_server start --config_file conf/ws_conformer_wenetspeech_application.yaml &> streaming_asr.log &
|
paddlespeech_server start --config_file conf/ws_conformer_wenetspeech_application.yaml &> streaming_asr.log &
|
||||||
|
|
||||||
|
@ -0,0 +1,103 @@
|
|||||||
|
# This is the parameter configuration file for streaming tts server.
|
||||||
|
|
||||||
|
#################################################################################
|
||||||
|
# SERVER SETTING #
|
||||||
|
#################################################################################
|
||||||
|
host: 0.0.0.0
|
||||||
|
port: 8192
|
||||||
|
|
||||||
|
# The task format in the engin_list is: <speech task>_<engine type>
|
||||||
|
# engine_list choices = ['tts_online', 'tts_online-onnx'], the inference speed of tts_online-onnx is faster than tts_online.
|
||||||
|
# protocol choices = ['websocket', 'http']
|
||||||
|
protocol: 'websocket'
|
||||||
|
engine_list: ['tts_online-onnx']
|
||||||
|
|
||||||
|
|
||||||
|
#################################################################################
|
||||||
|
# ENGINE CONFIG #
|
||||||
|
#################################################################################
|
||||||
|
|
||||||
|
################################### TTS #########################################
|
||||||
|
################### speech task: tts; engine_type: online #######################
|
||||||
|
tts_online:
|
||||||
|
# am (acoustic model) choices=['fastspeech2_csmsc', 'fastspeech2_cnndecoder_csmsc']
|
||||||
|
# fastspeech2_cnndecoder_csmsc support streaming am infer.
|
||||||
|
am: 'fastspeech2_csmsc'
|
||||||
|
am_config:
|
||||||
|
am_ckpt:
|
||||||
|
am_stat:
|
||||||
|
phones_dict:
|
||||||
|
tones_dict:
|
||||||
|
speaker_dict:
|
||||||
|
spk_id: 0
|
||||||
|
|
||||||
|
# voc (vocoder) choices=['mb_melgan_csmsc, hifigan_csmsc']
|
||||||
|
# Both mb_melgan_csmsc and hifigan_csmsc support streaming voc inference
|
||||||
|
voc: 'mb_melgan_csmsc'
|
||||||
|
voc_config:
|
||||||
|
voc_ckpt:
|
||||||
|
voc_stat:
|
||||||
|
|
||||||
|
# others
|
||||||
|
lang: 'zh'
|
||||||
|
device: 'cpu' # set 'gpu:id' or 'cpu'
|
||||||
|
# am_block and am_pad only for fastspeech2_cnndecoder_onnx model to streaming am infer,
|
||||||
|
# when am_pad set 12, streaming synthetic audio is the same as non-streaming synthetic audio
|
||||||
|
am_block: 72
|
||||||
|
am_pad: 12
|
||||||
|
# voc_pad and voc_block voc model to streaming voc infer,
|
||||||
|
# when voc model is mb_melgan_csmsc, voc_pad set 14, streaming synthetic audio is the same as non-streaming synthetic audio; The minimum value of pad can be set to 7, streaming synthetic audio sounds normal
|
||||||
|
# when voc model is hifigan_csmsc, voc_pad set 19, streaming synthetic audio is the same as non-streaming synthetic audio; voc_pad set 14, streaming synthetic audio sounds normal
|
||||||
|
voc_block: 36
|
||||||
|
voc_pad: 14
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#################################################################################
|
||||||
|
# ENGINE CONFIG #
|
||||||
|
#################################################################################
|
||||||
|
|
||||||
|
################################### TTS #########################################
|
||||||
|
################### speech task: tts; engine_type: online-onnx #######################
|
||||||
|
tts_online-onnx:
|
||||||
|
# am (acoustic model) choices=['fastspeech2_csmsc_onnx', 'fastspeech2_cnndecoder_csmsc_onnx']
|
||||||
|
# fastspeech2_cnndecoder_csmsc_onnx support streaming am infer.
|
||||||
|
am: 'fastspeech2_cnndecoder_csmsc_onnx'
|
||||||
|
# am_ckpt is a list, if am is fastspeech2_cnndecoder_csmsc_onnx, am_ckpt = [encoder model, decoder model, postnet model];
|
||||||
|
# if am is fastspeech2_csmsc_onnx, am_ckpt = [ckpt model];
|
||||||
|
am_ckpt: # list
|
||||||
|
am_stat:
|
||||||
|
phones_dict:
|
||||||
|
tones_dict:
|
||||||
|
speaker_dict:
|
||||||
|
spk_id: 0
|
||||||
|
am_sample_rate: 24000
|
||||||
|
am_sess_conf:
|
||||||
|
device: "cpu" # set 'gpu:id' or 'cpu'
|
||||||
|
use_trt: False
|
||||||
|
cpu_threads: 4
|
||||||
|
|
||||||
|
# voc (vocoder) choices=['mb_melgan_csmsc_onnx, hifigan_csmsc_onnx']
|
||||||
|
# Both mb_melgan_csmsc_onnx and hifigan_csmsc_onnx support streaming voc inference
|
||||||
|
voc: 'hifigan_csmsc_onnx'
|
||||||
|
voc_ckpt:
|
||||||
|
voc_sample_rate: 24000
|
||||||
|
voc_sess_conf:
|
||||||
|
device: "cpu" # set 'gpu:id' or 'cpu'
|
||||||
|
use_trt: False
|
||||||
|
cpu_threads: 4
|
||||||
|
|
||||||
|
# others
|
||||||
|
lang: 'zh'
|
||||||
|
# am_block and am_pad only for fastspeech2_cnndecoder_onnx model to streaming am infer,
|
||||||
|
# when am_pad set 12, streaming synthetic audio is the same as non-streaming synthetic audio
|
||||||
|
am_block: 72
|
||||||
|
am_pad: 12
|
||||||
|
# voc_pad and voc_block voc model to streaming voc infer,
|
||||||
|
# when voc model is mb_melgan_csmsc_onnx, voc_pad set 14, streaming synthetic audio is the same as non-streaming synthetic audio; The minimum value of pad can be set to 7, streaming synthetic audio sounds normal
|
||||||
|
# when voc model is hifigan_csmsc_onnx, voc_pad set 19, streaming synthetic audio is the same as non-streaming synthetic audio; voc_pad set 14, streaming synthetic audio sounds normal
|
||||||
|
voc_block: 36
|
||||||
|
voc_pad: 14
|
||||||
|
# voc_upsample should be same as n_shift on voc config.
|
||||||
|
voc_upsample: 300
|
||||||
|
|
@ -0,0 +1,10 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# http server
|
||||||
|
paddlespeech_server start --config_file ./conf/tts_online_application.yaml &> tts.http.log &
|
||||||
|
|
||||||
|
|
||||||
|
# websocket server
|
||||||
|
paddlespeech_server start --config_file ./conf/tts_online_ws_application.yaml &> tts.ws.log &
|
||||||
|
|
||||||
|
|
@ -1,3 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
# start server
|
|
||||||
paddlespeech_server start --config_file ./conf/tts_online_application.yaml
|
|
Loading…
Reference in new issue