commit
24f0a7d44b
@ -0,0 +1,13 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
@ -0,0 +1,13 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
@ -0,0 +1,36 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import os
|
||||
from typing import Optional
|
||||
|
||||
import onnxruntime as ort
|
||||
|
||||
|
||||
def get_sess(model_path: Optional[os.PathLike]=None, sess_conf: dict=None):
|
||||
sess_options = ort.SessionOptions()
|
||||
sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
|
||||
sess_options.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL
|
||||
|
||||
if "gpu" in sess_conf["device"]:
|
||||
# fastspeech2/mb_melgan can't use trt now!
|
||||
if sess_conf["use_trt"]:
|
||||
providers = ['TensorrtExecutionProvider']
|
||||
else:
|
||||
providers = ['CUDAExecutionProvider']
|
||||
elif sess_conf["device"] == "cpu":
|
||||
providers = ['CPUExecutionProvider']
|
||||
sess_options.intra_op_num_threads = sess_conf["cpu_threads"]
|
||||
sess = ort.InferenceSession(
|
||||
model_path, providers=providers, sess_options=sess_options)
|
||||
return sess
|
@ -0,0 +1,99 @@
|
||||
#!/usr/bin/python
|
||||
import argparse
|
||||
import os
|
||||
|
||||
import yaml
|
||||
|
||||
def change_value(args):
|
||||
yamlfile = args.config_file
|
||||
change_type = args.change_type
|
||||
engine_type = args.engine_type
|
||||
target_key = args.target_key
|
||||
target_value = args.target_value
|
||||
|
||||
tmp_yamlfile = yamlfile.split(".yaml")[0] + "_tmp.yaml"
|
||||
os.system("cp %s %s" % (yamlfile, tmp_yamlfile))
|
||||
|
||||
with open(tmp_yamlfile) as f, open(yamlfile, "w+", encoding="utf-8") as fw:
|
||||
y = yaml.safe_load(f)
|
||||
|
||||
if change_type == "model":
|
||||
if engine_type == "tts_online-onnx":
|
||||
target_value = target_value + "_onnx"
|
||||
y[engine_type][target_key] = target_value
|
||||
elif change_type == "protocol":
|
||||
assert (target_key == "protocol" and (
|
||||
target_value == "http" or target_value == "websocket"
|
||||
)), "if change_type is protocol, target_key must be set protocol."
|
||||
y[target_key] = target_value
|
||||
elif change_type == "engine_type":
|
||||
assert (
|
||||
target_key == "engine_list"
|
||||
), "if change_type is engine_type, target_key must be set engine_list."
|
||||
y[target_key] = [target_value]
|
||||
elif change_type == "device":
|
||||
assert (
|
||||
target_key == "device"
|
||||
), "if change_type is device, target_key must be set device."
|
||||
if y["engine_list"][0] == "tts_online":
|
||||
y["tts_online"]["device"] = target_value
|
||||
elif y["engine_list"][0] == "tts_online-onnx":
|
||||
y["tts_online-onnx"]["am_sess_conf"]["device"] = target_value
|
||||
y["tts_online-onnx"]["voc_sess_conf"]["device"] = target_value
|
||||
else:
|
||||
print(
|
||||
"Error engine_list, please set tts_online or tts_online-onnx"
|
||||
)
|
||||
|
||||
else:
|
||||
print("Error change_type, please set correct change_type.")
|
||||
|
||||
print(yaml.dump(y, default_flow_style=False, sort_keys=False))
|
||||
yaml.dump(y, fw, allow_unicode=True)
|
||||
os.system("rm %s" % (tmp_yamlfile))
|
||||
print(f"Change key: {target_key} to value: {target_value} successfully.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
'--config_file',
|
||||
type=str,
|
||||
default='./conf/application.yaml',
|
||||
help='server yaml file.')
|
||||
parser.add_argument(
|
||||
'--change_type',
|
||||
type=str,
|
||||
default="model",
|
||||
choices=["model", "protocol", "engine_type", "device"],
|
||||
help='change protocol', )
|
||||
parser.add_argument(
|
||||
'--engine_type',
|
||||
type=str,
|
||||
default="tts_online",
|
||||
help='engine type',
|
||||
choices=["tts_online", "tts_online-onnx"])
|
||||
parser.add_argument(
|
||||
'--target_key',
|
||||
type=str,
|
||||
default=None,
|
||||
help='Change key',
|
||||
required=True)
|
||||
parser.add_argument(
|
||||
'--target_value',
|
||||
type=str,
|
||||
default=None,
|
||||
help='target value',
|
||||
required=True)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
change_value(args)
|
||||
"""
|
||||
if args.change_type == "model":
|
||||
change_value(args.config_file, args.target_key, args.target_value, args.engine)
|
||||
elif args.change_type == "protocol":
|
||||
change_protocol(args.config_file, args.target_key, args.target_value)
|
||||
else:
|
||||
print("Please set correct change type, model or protocol")
|
||||
"""
|
@ -0,0 +1,88 @@
|
||||
# This is the parameter configuration file for PaddleSpeech Serving.
|
||||
|
||||
#################################################################################
|
||||
# SERVER SETTING #
|
||||
#################################################################################
|
||||
host: 127.0.0.1
|
||||
port: 8092
|
||||
|
||||
# The task format in the engin_list is: <speech task>_<engine type>
|
||||
# task choices = ['tts_online', 'tts_online-onnx']
|
||||
# protocol = ['websocket', 'http'] (only one can be selected).
|
||||
protocol: 'http'
|
||||
engine_list: ['tts_online']
|
||||
|
||||
|
||||
#################################################################################
|
||||
# ENGINE CONFIG #
|
||||
#################################################################################
|
||||
|
||||
################################### TTS #########################################
|
||||
################### speech task: tts; engine_type: online #######################
|
||||
tts_online:
|
||||
# am (acoustic model) choices=['fastspeech2_csmsc', 'fastspeech2_cnndecoder_csmsc']
|
||||
am: 'fastspeech2_cnndecoder_csmsc'
|
||||
am_config:
|
||||
am_ckpt:
|
||||
am_stat:
|
||||
phones_dict:
|
||||
tones_dict:
|
||||
speaker_dict:
|
||||
spk_id: 0
|
||||
|
||||
# voc (vocoder) choices=['mb_melgan_csmsc', 'hifigan_csmsc']
|
||||
voc: 'mb_melgan_csmsc'
|
||||
voc_config:
|
||||
voc_ckpt:
|
||||
voc_stat:
|
||||
|
||||
# others
|
||||
lang: 'zh'
|
||||
device: 'cpu' # set 'gpu:id' or 'cpu'
|
||||
am_block: 42
|
||||
am_pad: 12
|
||||
voc_block: 14
|
||||
voc_pad: 14
|
||||
|
||||
|
||||
|
||||
#################################################################################
|
||||
# ENGINE CONFIG #
|
||||
#################################################################################
|
||||
|
||||
################################### TTS #########################################
|
||||
################### speech task: tts; engine_type: online-onnx #######################
|
||||
tts_online-onnx:
|
||||
# am (acoustic model) choices=['fastspeech2_csmsc_onnx', 'fastspeech2_cnndecoder_csmsc_onnx']
|
||||
am: 'fastspeech2_cnndecoder_csmsc_onnx'
|
||||
# am_ckpt is a list, if am is fastspeech2_cnndecoder_csmsc_onnx, am_ckpt = [encoder model, decoder model, postnet model];
|
||||
# if am is fastspeech2_csmsc_onnx, am_ckpt = [ckpt model];
|
||||
am_ckpt: # list
|
||||
am_stat:
|
||||
phones_dict:
|
||||
tones_dict:
|
||||
speaker_dict:
|
||||
spk_id: 0
|
||||
am_sample_rate: 24000
|
||||
am_sess_conf:
|
||||
device: "cpu" # set 'gpu:id' or 'cpu'
|
||||
use_trt: False
|
||||
cpu_threads: 1
|
||||
|
||||
# voc (vocoder) choices=['mb_melgan_csmsc_onnx', 'hifigan_csmsc_onnx']
|
||||
voc: 'mb_melgan_csmsc_onnx'
|
||||
voc_ckpt:
|
||||
voc_sample_rate: 24000
|
||||
voc_sess_conf:
|
||||
device: "cpu" # set 'gpu:id' or 'cpu'
|
||||
use_trt: False
|
||||
cpu_threads: 1
|
||||
|
||||
# others
|
||||
lang: 'zh'
|
||||
am_block: 42
|
||||
am_pad: 12
|
||||
voc_block: 14
|
||||
voc_pad: 14
|
||||
voc_upsample: 300
|
||||
|
@ -0,0 +1,100 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import argparse
|
||||
import base64
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
|
||||
import requests
|
||||
|
||||
from paddlespeech.server.utils.audio_process import pcm2wav
|
||||
|
||||
|
||||
def save_audio(buffer, audio_path) -> bool:
|
||||
if args.save_path.endswith("pcm"):
|
||||
with open(args.save_path, "wb") as f:
|
||||
f.write(buffer)
|
||||
elif args.save_path.endswith("wav"):
|
||||
with open("./tmp.pcm", "wb") as f:
|
||||
f.write(buffer)
|
||||
pcm2wav("./tmp.pcm", audio_path, channels=1, bits=16, sample_rate=24000)
|
||||
os.system("rm ./tmp.pcm")
|
||||
else:
|
||||
print("Only supports saved audio format is pcm or wav")
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def test(args):
|
||||
params = {
|
||||
"text": args.text,
|
||||
"spk_id": args.spk_id,
|
||||
"speed": args.speed,
|
||||
"volume": args.volume,
|
||||
"sample_rate": args.sample_rate,
|
||||
"save_path": ''
|
||||
}
|
||||
|
||||
buffer = b''
|
||||
flag = 1
|
||||
url = "http://" + str(args.server) + ":" + str(
|
||||
args.port) + "/paddlespeech/streaming/tts"
|
||||
st = time.time()
|
||||
html = requests.post(url, json.dumps(params), stream=True)
|
||||
for chunk in html.iter_content(chunk_size=1024):
|
||||
chunk = base64.b64decode(chunk) # bytes
|
||||
if flag:
|
||||
first_response = time.time() - st
|
||||
print(f"首包响应:{first_response} s")
|
||||
flag = 0
|
||||
buffer += chunk
|
||||
|
||||
final_response = time.time() - st
|
||||
duration = len(buffer) / 2.0 / 24000
|
||||
|
||||
print(f"尾包响应:{final_response} s")
|
||||
print(f"音频时长:{duration} s")
|
||||
print(f"RTF: {final_response / duration}")
|
||||
|
||||
if args.save_path is not None:
|
||||
if save_audio(buffer, args.save_path):
|
||||
print("音频保存至:", args.save_path)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
'--text',
|
||||
type=str,
|
||||
default="您好,欢迎使用语音合成服务。",
|
||||
help='A sentence to be synthesized')
|
||||
parser.add_argument('--spk_id', type=int, default=0, help='Speaker id')
|
||||
parser.add_argument('--speed', type=float, default=1.0, help='Audio speed')
|
||||
parser.add_argument(
|
||||
'--volume', type=float, default=1.0, help='Audio volume')
|
||||
parser.add_argument(
|
||||
'--sample_rate',
|
||||
type=int,
|
||||
default=0,
|
||||
help='Sampling rate, the default is the same as the model')
|
||||
parser.add_argument(
|
||||
"--server", type=str, help="server ip", default="127.0.0.1")
|
||||
parser.add_argument("--port", type=int, help="server port", default=8092)
|
||||
parser.add_argument(
|
||||
"--save_path", type=str, help="save audio path", default=None)
|
||||
|
||||
args = parser.parse_args()
|
||||
test(args)
|
@ -0,0 +1,315 @@
|
||||
#!/bin/bash
|
||||
# bash test.sh
|
||||
|
||||
StartService(){
|
||||
# Start service
|
||||
paddlespeech_server start --config_file $config_file 1>>$log/server.log 2>>$log/server.log.wf &
|
||||
echo $! > pid
|
||||
|
||||
start_num=$(cat $log/server.log.wf | grep "INFO: Uvicorn running on http://" -c)
|
||||
flag="normal"
|
||||
while [[ $start_num -lt $target_start_num && $flag == "normal" ]]
|
||||
do
|
||||
start_num=$(cat $log/server.log.wf | grep "INFO: Uvicorn running on http://" -c)
|
||||
# start service failed
|
||||
if [ $(cat $log/server.log.wf | grep -i "Failed to warm up on tts engine." -c) -gt $error_time ];then
|
||||
echo "Service started failed." | tee -a $log/test_result.log
|
||||
error_time=$(cat $log/server.log.wf | grep -i "Failed to warm up on tts engine." -c)
|
||||
flag="unnormal"
|
||||
|
||||
elif [ $(cat $log/server.log.wf | grep -i "AssertionError" -c) -gt $error_time ];then
|
||||
echo "Service started failed." | tee -a $log/test_result.log
|
||||
error_time+=$(cat $log/server.log.wf | grep -i "AssertionError" -c)
|
||||
flag="unnormal"
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
ClientTest_http(){
|
||||
for ((i=1; i<=3;i++))
|
||||
do
|
||||
python http_client.py --save_path ./out_http.wav
|
||||
((http_test_times+=1))
|
||||
done
|
||||
}
|
||||
|
||||
ClientTest_ws(){
|
||||
for ((i=1; i<=3;i++))
|
||||
do
|
||||
python ws_client.py
|
||||
((ws_test_times+=1))
|
||||
done
|
||||
}
|
||||
|
||||
GetTestResult_http() {
|
||||
# Determine if the test was successful
|
||||
http_response_success_time=$(cat $log/server.log | grep "200 OK" -c)
|
||||
if (( $http_response_success_time == $http_test_times )) ; then
|
||||
echo "Testing successfully. $info" | tee -a $log/test_result.log
|
||||
else
|
||||
echo "Testing failed. $info" | tee -a $log/test_result.log
|
||||
fi
|
||||
http_test_times=$http_response_success_time
|
||||
}
|
||||
|
||||
GetTestResult_ws() {
|
||||
# Determine if the test was successful
|
||||
ws_response_success_time=$(cat $log/server.log.wf | grep "Complete the transmission of audio streams" -c)
|
||||
if (( $ws_response_success_time == $ws_test_times )) ; then
|
||||
echo "Testing successfully. $info" | tee -a $log/test_result.log
|
||||
else
|
||||
echo "Testing failed. $info" | tee -a $log/test_result.log
|
||||
fi
|
||||
ws_test_times=$ws_response_success_time
|
||||
}
|
||||
|
||||
|
||||
engine_type=$1
|
||||
log=$2
|
||||
mkdir -p $log
|
||||
rm -rf $log/server.log.wf
|
||||
rm -rf $log/server.log
|
||||
rm -rf $log/test_result.log
|
||||
|
||||
config_file=./conf/application.yaml
|
||||
server_ip=$(cat $config_file | grep "host" | awk -F " " '{print $2}')
|
||||
port=$(cat $config_file | grep "port" | awk '/port:/ {print $2}')
|
||||
|
||||
echo "Sevice ip: $server_ip" | tee $log/test_result.log
|
||||
echo "Sevice port: $port" | tee -a $log/test_result.log
|
||||
|
||||
# whether a process is listening on $port
|
||||
pid=`lsof -i :"$port"|grep -v "PID" | awk '{print $2}'`
|
||||
if [ "$pid" != "" ]; then
|
||||
echo "The port: $port is occupied, please change another port"
|
||||
exit
|
||||
fi
|
||||
|
||||
|
||||
|
||||
target_start_num=0 # the number of start service
|
||||
test_times=0 # The number of client test
|
||||
error_time=0 # The number of error occurrences in the startup failure server.log.wf file
|
||||
|
||||
# start server: engine: tts_online, protocol: http, am: fastspeech2_cnndecoder_csmsc, voc: mb_melgan_csmsc
|
||||
info="start server: engine: $engine_type, protocol: http, am: fastspeech2_cnndecoder_csmsc, voc: mb_melgan_csmsc."
|
||||
echo "$info" | tee -a $log/test_result.log
|
||||
((target_start_num+=1))
|
||||
StartService
|
||||
|
||||
if [[ $start_num -eq $target_start_num && $flag == "normal" ]]; then
|
||||
echo "Service started successfully." | tee -a $log/test_result.log
|
||||
ClientTest_http
|
||||
echo "This round of testing is over." | tee -a $log/test_result.log
|
||||
|
||||
GetTestResult_http
|
||||
else
|
||||
echo "Service failed to start, no client test."
|
||||
target_start_num=$start_num
|
||||
|
||||
fi
|
||||
|
||||
kill -9 `cat pid`
|
||||
rm -rf pid
|
||||
sleep 2s
|
||||
echo "**************************************************************************************" | tee -a $log/test_result.log
|
||||
|
||||
|
||||
|
||||
|
||||
python change_yaml.py --engine_type $engine_type --target_key voc --target_value hifigan_csmsc # change voc: mb_melgan_csmsc -> hifigan_csmsc
|
||||
# start server: engine: tts_online, protocol: http, am: fastspeech2_cnndecoder_csmsc, voc: hifigan_csmsc
|
||||
info="start server: engine: $engine_type, protocol: http, am: fastspeech2_cnndecoder_csmsc, voc: hifigan_csmsc."
|
||||
|
||||
echo "$info" | tee -a $log/test_result.log
|
||||
((target_start_num+=1))
|
||||
StartService
|
||||
|
||||
if [[ $start_num -eq $target_start_num && $flag == "normal" ]]; then
|
||||
echo "Service started successfully." | tee -a $log/test_result.log
|
||||
ClientTest_http
|
||||
echo "This round of testing is over." | tee -a $log/test_result.log
|
||||
|
||||
GetTestResult_http
|
||||
else
|
||||
echo "Service failed to start, no client test."
|
||||
target_start_num=$start_num
|
||||
|
||||
fi
|
||||
|
||||
kill -9 `cat pid`
|
||||
rm -rf pid
|
||||
sleep 2s
|
||||
echo "**************************************************************************************" | tee -a $log/test_result.log
|
||||
|
||||
|
||||
|
||||
python change_yaml.py --engine_type $engine_type --target_key am --target_value fastspeech2_csmsc # change am: fastspeech2_cnndecoder_csmsc -> fastspeech2_csmsc
|
||||
# start server: engine: tts_online, protocol: http, am: fastspeech2_csmsc, voc: hifigan_csmsc
|
||||
info="start server: engine: $engine_type, protocol: http, am: fastspeech2_csmsc, voc: hifigan_csmsc."
|
||||
|
||||
echo "$info" | tee -a $log/test_result.log
|
||||
((target_start_num+=1))
|
||||
StartService
|
||||
|
||||
if [[ $start_num -eq $target_start_num && $flag == "normal" ]]; then
|
||||
echo "Service started successfully." | tee -a $log/test_result.log
|
||||
ClientTest_http
|
||||
echo "This round of testing is over." | tee -a $log/test_result.log
|
||||
|
||||
GetTestResult_http
|
||||
else
|
||||
echo "Service failed to start, no client test."
|
||||
target_start_num=$start_num
|
||||
|
||||
fi
|
||||
|
||||
kill -9 `cat pid`
|
||||
rm -rf pid
|
||||
sleep 2s
|
||||
echo "**************************************************************************************" | tee -a $log/test_result.log
|
||||
|
||||
|
||||
python change_yaml.py --engine_type $engine_type --target_key voc --target_value mb_melgan_csmsc # change voc: hifigan_csmsc -> mb_melgan_csmsc
|
||||
# start server: engine: tts_online, protocol: http, am: fastspeech2_csmsc, voc: mb_melgan_csmsc
|
||||
info="start server: engine: $engine_type, protocol: http, am: fastspeech2_csmsc, voc: mb_melgan_csmsc."
|
||||
|
||||
echo "$info" | tee -a $log/test_result.log
|
||||
((target_start_num+=1))
|
||||
StartService
|
||||
|
||||
if [[ $start_num -eq $target_start_num && $flag == "normal" ]]; then
|
||||
echo "Service started successfully." | tee -a $log/test_result.log
|
||||
ClientTest_http
|
||||
echo "This round of testing is over." | tee -a $log/test_result.log
|
||||
|
||||
GetTestResult_http
|
||||
else
|
||||
echo "Service failed to start, no client test."
|
||||
target_start_num=$start_num
|
||||
|
||||
fi
|
||||
|
||||
kill -9 `cat pid`
|
||||
rm -rf pid
|
||||
sleep 2s
|
||||
echo "**************************************************************************************" | tee -a $log/test_result.log
|
||||
|
||||
|
||||
echo "********************************************* websocket **********************************************************"
|
||||
|
||||
python change_yaml.py --engine_type $engine_type --change_type protocol --target_key protocol --target_value websocket
|
||||
# start server: engine: tts_online, protocol: websocket, am: fastspeech2_csmsc, voc: mb_melgan_csmsc
|
||||
info="start server: engine: $engine_type, protocol: websocket, am: fastspeech2_csmsc, voc: mb_melgan_csmsc."
|
||||
|
||||
echo "$info" | tee -a $log/test_result.log
|
||||
((target_start_num+=1))
|
||||
StartService
|
||||
|
||||
if [[ $start_num -eq $target_start_num && $flag == "normal" ]]; then
|
||||
echo "Service started successfully." | tee -a $log/test_result.log
|
||||
ClientTest_ws
|
||||
echo "This round of testing is over." | tee -a $log/test_result.log
|
||||
|
||||
GetTestResult_ws
|
||||
else
|
||||
echo "Service failed to start, no client test."
|
||||
target_start_num=$start_num
|
||||
|
||||
fi
|
||||
|
||||
kill -9 `cat pid`
|
||||
rm -rf pid
|
||||
sleep 2s
|
||||
echo "**************************************************************************************" | tee -a $log/test_result.log
|
||||
|
||||
python change_yaml.py --engine_type $engine_type --target_key voc --target_value hifigan_csmsc # change voc: mb_melgan_csmsc -> hifigan_csmsc
|
||||
# start server: engine: tts_online, protocol: websocket, am: fastspeech2_csmsc, voc: hifigan_csmsc
|
||||
info="start server: engine: $engine_type, protocol: websocket, am: fastspeech2_csmsc, voc: hifigan_csmsc."
|
||||
|
||||
echo "$info" | tee -a $log/test_result.log
|
||||
((target_start_num+=1))
|
||||
StartService
|
||||
|
||||
if [[ $start_num -eq $target_start_num && $flag == "normal" ]]; then
|
||||
echo "Service started successfully." | tee -a $log/test_result.log
|
||||
ClientTest_ws
|
||||
echo "This round of testing is over." | tee -a $log/test_result.log
|
||||
|
||||
GetTestResult_ws
|
||||
else
|
||||
echo "Service failed to start, no client test."
|
||||
target_start_num=$start_num
|
||||
|
||||
fi
|
||||
|
||||
kill -9 `cat pid`
|
||||
rm -rf pid
|
||||
sleep 2s
|
||||
echo "**************************************************************************************" | tee -a $log/test_result.log
|
||||
|
||||
|
||||
python change_yaml.py --engine_type $engine_type --target_key am --target_value fastspeech2_cnndecoder_csmsc # change am: fastspeech2_csmsc -> fastspeech2_cnndecoder_csmsc
|
||||
# start server: engine: tts_online, protocol: websocket, am: fastspeech2_cnndecoder_csmsc, voc: hifigan_csmsc
|
||||
info="start server: engine: $engine_type, protocol: websocket, am: fastspeech2_cnndecoder_csmsc, voc: hifigan_csmsc."
|
||||
|
||||
echo "$info" | tee -a $log/test_result.log
|
||||
((target_start_num+=1))
|
||||
StartService
|
||||
|
||||
if [[ $start_num -eq $target_start_num && $flag == "normal" ]]; then
|
||||
echo "Service started successfully." | tee -a $log/test_result.log
|
||||
ClientTest_ws
|
||||
echo "This round of testing is over." | tee -a $log/test_result.log
|
||||
|
||||
GetTestResult_ws
|
||||
else
|
||||
echo "Service failed to start, no client test."
|
||||
target_start_num=$start_num
|
||||
|
||||
fi
|
||||
|
||||
kill -9 `cat pid`
|
||||
rm -rf pid
|
||||
sleep 2s
|
||||
echo "**************************************************************************************" | tee -a $log/test_result.log
|
||||
|
||||
|
||||
|
||||
python change_yaml.py --engine_type $engine_type --target_key voc --target_value mb_melgan_csmsc # change am: hifigan_csmsc -> mb_melgan_csmsc
|
||||
# start server: engine: tts_online, protocol: websocket, am: fastspeech2_cnndecoder_csmsc, voc: mb_melgan_csmsc
|
||||
info="start server: engine: $engine_type, protocol: websocket, am: fastspeech2_cnndecoder_csmsc, voc: mb_melgan_csmsc."
|
||||
|
||||
echo "$info" | tee -a $log/test_result.log
|
||||
((target_start_num+=1))
|
||||
StartService
|
||||
|
||||
if [[ $start_num -eq $target_start_num && $flag == "normal" ]]; then
|
||||
echo "Service started successfully." | tee -a $log/test_result.log
|
||||
ClientTest_ws
|
||||
echo "This round of testing is over." | tee -a $log/test_result.log
|
||||
|
||||
GetTestResult_ws
|
||||
else
|
||||
echo "Service failed to start, no client test."
|
||||
target_start_num=$start_num
|
||||
|
||||
fi
|
||||
|
||||
kill -9 `cat pid`
|
||||
rm -rf pid
|
||||
sleep 2s
|
||||
echo "**************************************************************************************" | tee -a $log/test_result.log
|
||||
|
||||
|
||||
|
||||
echo "All tests completed." | tee -a $log/test_result.log
|
||||
|
||||
|
||||
# sohw all the test results
|
||||
echo "***************** Here are all the test results ********************"
|
||||
cat $log/test_result.log
|
||||
|
||||
# Restoring conf is the same as demos/speech_server
|
||||
cp ./tts_online_application.yaml ./conf/application.yaml -rf
|
||||
sleep 2s
|
@ -0,0 +1,23 @@
|
||||
#!/bin/bash
|
||||
# bash test_all.sh
|
||||
|
||||
log_all_dir=./log
|
||||
|
||||
bash test.sh tts_online $log_all_dir/log_tts_online_cpu
|
||||
|
||||
python change_yaml.py --change_type engine_type --target_key engine_list --target_value tts_online-onnx
|
||||
bash test.sh tts_online-onnx $log_all_dir/log_tts_online-onnx_cpu
|
||||
|
||||
|
||||
python change_yaml.py --change_type device --target_key device --target_value gpu:3
|
||||
bash test.sh tts_online $log_all_dir/log_tts_online_gpu
|
||||
|
||||
python change_yaml.py --change_type engine_type --target_key engine_list --target_value tts_online-onnx
|
||||
python change_yaml.py --change_type device --target_key device --target_value gpu:3
|
||||
bash test.sh tts_online-onnx $log_all_dir/log_tts_online-onnx_gpu
|
||||
|
||||
echo "************************************** show all test results ****************************************"
|
||||
cat $log_all_dir/log_tts_online_cpu/test_result.log
|
||||
cat $log_all_dir/log_tts_online-onnx_cpu/test_result.log
|
||||
cat $log_all_dir/log_tts_online_gpu/test_result.log
|
||||
cat $log_all_dir/log_tts_online-onnx_gpu/test_result.log
|
@ -0,0 +1,88 @@
|
||||
# This is the parameter configuration file for PaddleSpeech Serving.
|
||||
|
||||
#################################################################################
|
||||
# SERVER SETTING #
|
||||
#################################################################################
|
||||
host: 127.0.0.1
|
||||
port: 8092
|
||||
|
||||
# The task format in the engin_list is: <speech task>_<engine type>
|
||||
# task choices = ['tts_online', 'tts_online-onnx']
|
||||
# protocol = ['websocket', 'http'] (only one can be selected).
|
||||
protocol: 'http'
|
||||
engine_list: ['tts_online']
|
||||
|
||||
|
||||
#################################################################################
|
||||
# ENGINE CONFIG #
|
||||
#################################################################################
|
||||
|
||||
################################### TTS #########################################
|
||||
################### speech task: tts; engine_type: online #######################
|
||||
tts_online:
|
||||
# am (acoustic model) choices=['fastspeech2_csmsc', 'fastspeech2_cnndecoder_csmsc']
|
||||
am: 'fastspeech2_cnndecoder_csmsc'
|
||||
am_config:
|
||||
am_ckpt:
|
||||
am_stat:
|
||||
phones_dict:
|
||||
tones_dict:
|
||||
speaker_dict:
|
||||
spk_id: 0
|
||||
|
||||
# voc (vocoder) choices=['mb_melgan_csmsc', 'hifigan_csmsc']
|
||||
voc: 'mb_melgan_csmsc'
|
||||
voc_config:
|
||||
voc_ckpt:
|
||||
voc_stat:
|
||||
|
||||
# others
|
||||
lang: 'zh'
|
||||
device: 'cpu' # set 'gpu:id' or 'cpu'
|
||||
am_block: 42
|
||||
am_pad: 12
|
||||
voc_block: 14
|
||||
voc_pad: 14
|
||||
|
||||
|
||||
|
||||
#################################################################################
|
||||
# ENGINE CONFIG #
|
||||
#################################################################################
|
||||
|
||||
################################### TTS #########################################
|
||||
################### speech task: tts; engine_type: online-onnx #######################
|
||||
tts_online-onnx:
|
||||
# am (acoustic model) choices=['fastspeech2_csmsc_onnx', 'fastspeech2_cnndecoder_csmsc_onnx']
|
||||
am: 'fastspeech2_cnndecoder_csmsc_onnx'
|
||||
# am_ckpt is a list, if am is fastspeech2_cnndecoder_csmsc_onnx, am_ckpt = [encoder model, decoder model, postnet model];
|
||||
# if am is fastspeech2_csmsc_onnx, am_ckpt = [ckpt model];
|
||||
am_ckpt: # list
|
||||
am_stat:
|
||||
phones_dict:
|
||||
tones_dict:
|
||||
speaker_dict:
|
||||
spk_id: 0
|
||||
am_sample_rate: 24000
|
||||
am_sess_conf:
|
||||
device: "cpu" # set 'gpu:id' or 'cpu'
|
||||
use_trt: False
|
||||
cpu_threads: 1
|
||||
|
||||
# voc (vocoder) choices=['mb_melgan_csmsc_onnx', 'hifigan_csmsc_onnx']
|
||||
voc: 'mb_melgan_csmsc_onnx'
|
||||
voc_ckpt:
|
||||
voc_sample_rate: 24000
|
||||
voc_sess_conf:
|
||||
device: "cpu" # set 'gpu:id' or 'cpu'
|
||||
use_trt: False
|
||||
cpu_threads: 1
|
||||
|
||||
# others
|
||||
lang: 'zh'
|
||||
am_block: 42
|
||||
am_pad: 12
|
||||
voc_block: 14
|
||||
voc_pad: 14
|
||||
voc_upsample: 300
|
||||
|
@ -0,0 +1,126 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import _thread as thread
|
||||
import argparse
|
||||
import base64
|
||||
import json
|
||||
import ssl
|
||||
import time
|
||||
|
||||
import websocket
|
||||
|
||||
flag = 1
|
||||
st = 0.0
|
||||
all_bytes = b''
|
||||
|
||||
|
||||
class WsParam(object):
|
||||
# 初始化
|
||||
def __init__(self, text, server="127.0.0.1", port=8090):
|
||||
self.server = server
|
||||
self.port = port
|
||||
self.url = "ws://" + self.server + ":" + str(self.port) + "/ws/tts"
|
||||
self.text = text
|
||||
|
||||
# 生成url
|
||||
def create_url(self):
|
||||
return self.url
|
||||
|
||||
|
||||
def on_message(ws, message):
|
||||
global flag
|
||||
global st
|
||||
global all_bytes
|
||||
|
||||
try:
|
||||
message = json.loads(message)
|
||||
audio = message["audio"]
|
||||
audio = base64.b64decode(audio) # bytes
|
||||
status = message["status"]
|
||||
all_bytes += audio
|
||||
|
||||
if status == 0:
|
||||
print("create successfully.")
|
||||
elif status == 1:
|
||||
if flag:
|
||||
print(f"首包响应:{time.time() - st} s")
|
||||
flag = 0
|
||||
elif status == 2:
|
||||
final_response = time.time() - st
|
||||
duration = len(all_bytes) / 2.0 / 24000
|
||||
print(f"尾包响应:{final_response} s")
|
||||
print(f"音频时长:{duration} s")
|
||||
print(f"RTF: {final_response / duration}")
|
||||
with open("./out.pcm", "wb") as f:
|
||||
f.write(all_bytes)
|
||||
print("ws is closed")
|
||||
ws.close()
|
||||
else:
|
||||
print("infer error")
|
||||
|
||||
except Exception as e:
|
||||
print("receive msg,but parse exception:", e)
|
||||
|
||||
|
||||
# 收到websocket错误的处理
|
||||
def on_error(ws, error):
|
||||
print("### error:", error)
|
||||
|
||||
|
||||
# 收到websocket关闭的处理
|
||||
def on_close(ws):
|
||||
print("### closed ###")
|
||||
|
||||
|
||||
# 收到websocket连接建立的处理
|
||||
def on_open(ws):
|
||||
def run(*args):
|
||||
global st
|
||||
text_base64 = str(
|
||||
base64.b64encode((wsParam.text).encode('utf-8')), "UTF8")
|
||||
d = {"text": text_base64}
|
||||
d = json.dumps(d)
|
||||
print("Start sending text data")
|
||||
st = time.time()
|
||||
ws.send(d)
|
||||
|
||||
thread.start_new_thread(run, ())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"--text",
|
||||
type=str,
|
||||
help="A sentence to be synthesized",
|
||||
default="您好,欢迎使用语音合成服务。")
|
||||
parser.add_argument(
|
||||
"--server", type=str, help="server ip", default="127.0.0.1")
|
||||
parser.add_argument("--port", type=int, help="server port", default=8092)
|
||||
args = parser.parse_args()
|
||||
|
||||
print("***************************************")
|
||||
print("Server ip: ", args.server)
|
||||
print("Server port: ", args.port)
|
||||
print("Sentence to be synthesized: ", args.text)
|
||||
print("***************************************")
|
||||
|
||||
wsParam = WsParam(text=args.text, server=args.server, port=args.port)
|
||||
|
||||
websocket.enableTrace(False)
|
||||
wsUrl = wsParam.create_url()
|
||||
ws = websocket.WebSocketApp(
|
||||
wsUrl, on_message=on_message, on_error=on_error, on_close=on_close)
|
||||
ws.on_open = on_open
|
||||
ws.run_forever(sslopt={"cert_reqs": ssl.CERT_NONE})
|
@ -0,0 +1,188 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import argparse
|
||||
import base64
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
|
||||
import requests
|
||||
|
||||
from paddlespeech.server.utils.audio_process import pcm2wav
|
||||
from paddlespeech.t2s.exps.syn_utils import get_sentences
|
||||
|
||||
|
||||
def save_audio(buffer, audio_path) -> bool:
|
||||
if audio_path.endswith("pcm"):
|
||||
with open(audio_path, "wb") as f:
|
||||
f.write(buffer)
|
||||
elif audio_path.endswith("wav"):
|
||||
with open("./tmp.pcm", "wb") as f:
|
||||
f.write(buffer)
|
||||
pcm2wav("./tmp.pcm", audio_path, channels=1, bits=16, sample_rate=24000)
|
||||
os.system("rm ./tmp.pcm")
|
||||
else:
|
||||
print("Only supports saved audio format is pcm or wav")
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def test(args, text, utt_id):
|
||||
params = {
|
||||
"text": text,
|
||||
"spk_id": args.spk_id,
|
||||
"speed": args.speed,
|
||||
"volume": args.volume,
|
||||
"sample_rate": args.sample_rate,
|
||||
"save_path": ''
|
||||
}
|
||||
|
||||
buffer = b''
|
||||
flag = 1
|
||||
url = "http://" + str(args.server) + ":" + str(
|
||||
args.port) + "/paddlespeech/streaming/tts"
|
||||
st = time.time()
|
||||
html = requests.post(url, json.dumps(params), stream=True)
|
||||
for chunk in html.iter_content(chunk_size=1024):
|
||||
chunk = base64.b64decode(chunk) # bytes
|
||||
if flag:
|
||||
first_response = time.time() - st
|
||||
print(f"首包响应:{first_response} s")
|
||||
flag = 0
|
||||
buffer += chunk
|
||||
|
||||
final_response = time.time() - st
|
||||
duration = len(buffer) / 2.0 / 24000
|
||||
|
||||
print(f"sentence: {text}")
|
||||
print(f"尾包响应:{final_response} s")
|
||||
print(f"音频时长:{duration} s")
|
||||
print(f"RTF: {final_response / duration}")
|
||||
|
||||
save_path = str(args.output_dir + "/" + utt_id + ".wav")
|
||||
save_audio(buffer, save_path)
|
||||
print("音频保存至:", save_path)
|
||||
|
||||
return first_response, final_response, duration
|
||||
|
||||
|
||||
def count_engine(logfile: str="./nohup.out"):
|
||||
"""For inference on the statistical engine side
|
||||
|
||||
Args:
|
||||
logfile (str, optional): server log. Defaults to "./nohup.out".
|
||||
"""
|
||||
first_response_list = []
|
||||
final_response_list = []
|
||||
duration_list = []
|
||||
|
||||
with open(logfile, "r") as f:
|
||||
for line in f.readlines():
|
||||
if "- first response time:" in line:
|
||||
first_response = float(line.splie(" ")[-2])
|
||||
first_response_list.append(first_response)
|
||||
elif "- final response time:" in line:
|
||||
final_response = float(line.splie(" ")[-2])
|
||||
final_response_list.append(final_response)
|
||||
elif "- The durations of audio is:" in line:
|
||||
duration = float(line.splie(" ")[-2])
|
||||
duration_list.append(duration)
|
||||
|
||||
assert (len(first_response_list) == len(final_response_list) and
|
||||
len(final_response_list) == len(duration_list))
|
||||
|
||||
avg_first_response = sum(first_response_list) / len(first_response_list)
|
||||
avg_final_response = sum(final_response_list) / len(final_response_list)
|
||||
avg_duration = sum(duration_list) / len(duration_list)
|
||||
RTF = sum(final_response_list) / sum(duration_list)
|
||||
|
||||
print(
|
||||
"************************* engine result ***************************************"
|
||||
)
|
||||
print(
|
||||
f"test num: {len(duration_list)}, avg first response: {avg_first_response} s, avg final response: {avg_final_response} s, avg duration: {avg_duration}, RTF: {RTF}"
|
||||
)
|
||||
print(
|
||||
f"min duration: {min(duration_list)} s, max duration: {max(duration_list)} s"
|
||||
)
|
||||
print(
|
||||
f"max first response: {max(first_response_list)} s, min first response: {min(first_response_list)} s"
|
||||
)
|
||||
print(
|
||||
f"max final response: {max(final_response_list)} s, min final response: {min(final_response_list)} s"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"--text",
|
||||
type=str,
|
||||
default="../../../../../../paddlespeech/t2s/exps/csmsc_test.txt",
|
||||
help="text to synthesize, a 'utt_id sentence' pair per line")
|
||||
parser.add_argument('--spk_id', type=int, default=0, help='Speaker id')
|
||||
parser.add_argument('--speed', type=float, default=1.0, help='Audio speed')
|
||||
parser.add_argument(
|
||||
'--volume', type=float, default=1.0, help='Audio volume')
|
||||
parser.add_argument(
|
||||
'--sample_rate',
|
||||
type=int,
|
||||
default=0,
|
||||
help='Sampling rate, the default is the same as the model')
|
||||
parser.add_argument(
|
||||
"--server", type=str, help="server ip", default="127.0.0.1")
|
||||
parser.add_argument("--port", type=int, help="server port", default=8092)
|
||||
parser.add_argument(
|
||||
"--output_dir", type=str, default="./output", help="output dir")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
os.system("rm -rf %s" % (args.output_dir))
|
||||
os.mkdir(args.output_dir)
|
||||
|
||||
first_response_list = []
|
||||
final_response_list = []
|
||||
duration_list = []
|
||||
|
||||
sentences = get_sentences(text_file=args.text, lang="zh")
|
||||
for utt_id, sentence in sentences:
|
||||
first_response, final_response, duration = test(args, sentence, utt_id)
|
||||
first_response_list.append(first_response)
|
||||
final_response_list.append(final_response)
|
||||
duration_list.append(duration)
|
||||
|
||||
assert (len(first_response_list) == len(final_response_list) and
|
||||
len(final_response_list) == len(duration_list))
|
||||
|
||||
avg_first_response = sum(first_response_list) / len(first_response_list)
|
||||
avg_final_response = sum(final_response_list) / len(final_response_list)
|
||||
avg_duration = sum(duration_list) / len(duration_list)
|
||||
RTF = sum(final_response_list) / sum(duration_list)
|
||||
|
||||
print(
|
||||
"************************* server/client result ***************************************"
|
||||
)
|
||||
print(
|
||||
f"test num: {len(duration_list)}, avg first response: {avg_first_response} s, avg final response: {avg_final_response} s, avg duration: {avg_duration}, RTF: {RTF}"
|
||||
)
|
||||
print(
|
||||
f"min duration: {min(duration_list)} s, max duration: {max(duration_list)} s"
|
||||
)
|
||||
print(
|
||||
f"max first response: {max(first_response_list)} s, min first response: {min(first_response_list)} s"
|
||||
)
|
||||
print(
|
||||
f"max final response: {max(final_response_list)} s, min final response: {min(final_response_list)} s"
|
||||
)
|
Loading…
Reference in new issue