You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
PaddleSpeech/tests/unit/server/online/tts/test_server/test_http_client.py

153 lines
6.3 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import asyncio
import os
from paddlespeech.server.utils.util import compute_delay
from paddlespeech.t2s.exps.syn_utils import get_sentences
def test(args, text, utt_id):
output = str(args.output_dir + "/" + utt_id + ".wav")
if args.protocol == "http":
print("tts http client start")
from paddlespeech.server.utils.audio_handler import TTSHttpHandler
handler = TTSHttpHandler(args.server_ip, args.port, args.play)
first_response, final_response, duration, save_audio_success, receive_time_list, chunk_duration_list = handler.run(
text, args.spk_id, args.speed, args.volume, args.sample_rate,
output)
elif args.protocol == "websocket":
from paddlespeech.server.utils.audio_handler import TTSWsHandler
print("tts websocket client start")
handler = TTSWsHandler(args.server_ip, args.port, args.play)
loop = asyncio.get_event_loop()
first_response, final_response, duration, save_audio_success, receive_time_list, chunk_duration_list = loop.run_until_complete(
handler.run(text, output))
else:
print("Please set correct protocol, http or websocket")
return first_response, final_response, duration, save_audio_success, receive_time_list, chunk_duration_list
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--text",
type=str,
default="../../../../../../paddlespeech/t2s/exps/csmsc_test.txt",
help="text to synthesize, a 'utt_id sentence' pair per line")
parser.add_argument('--spk_id', type=int, default=0, help='Speaker id')
parser.add_argument('--speed', type=float, default=1.0, help='Audio speed')
parser.add_argument(
'--volume', type=float, default=1.0, help='Audio volume')
parser.add_argument(
'--sample_rate',
type=int,
default=0,
help='Sampling rate, the default is the same as the model')
parser.add_argument(
"--server_ip", type=str, help="server ip", default="127.0.0.1")
parser.add_argument("--port", type=int, help="server port", default=8092)
parser.add_argument(
"--protocol",
type=str,
choices=['http', 'websocket'],
help="server protocol",
default="http")
parser.add_argument(
"--output_dir", type=str, default="./output", help="output dir")
parser.add_argument(
"--play", type=bool, help="whether to play audio", default=False)
args = parser.parse_args()
if not os.path.exists(args.output_dir):
os.makedirs(args.output_dir)
first_response_list = []
final_response_list = []
duration_list = []
all_delay_list = []
packet_count = 0.0
sentences = get_sentences(text_file=args.text, lang="zh")
for utt_id, sentence in sentences:
first_response, final_response, duration, save_audio_success, receive_time_list, chunk_duration_list = test(
args, sentence, utt_id)
delay_time_list = compute_delay(receive_time_list, chunk_duration_list)
first_response_list.append(first_response)
final_response_list.append(final_response)
duration_list.append(duration)
packet_count += len(receive_time_list)
print(f"句子:{sentence}")
print(f"首包响应时间:{first_response} s")
print(f"尾包响应时间:{final_response} s")
print(f"音频时长:{duration} s")
print(f"该句RTF{final_response/duration}")
if delay_time_list != []:
for t in delay_time_list:
all_delay_list.append(t)
print(
f"该句流式合成的延迟情况:总包个数:{len(receive_time_list)},延迟包个数:{len(delay_time_list)}, 最小延迟时间:{min(delay_time_list)} s, 最大延迟时间:{max(delay_time_list)} s, 平均延迟时间:{sum(delay_time_list)/len(delay_time_list)} s, 延迟率:{len(delay_time_list)/len(receive_time_list)}"
)
else:
print("该句流式合成无延迟情况")
packet_count += len(receive_time_list)
assert (len(first_response_list) == len(final_response_list) and
len(final_response_list) == len(duration_list))
avg_first_response = sum(first_response_list) / len(first_response_list)
avg_final_response = sum(final_response_list) / len(final_response_list)
avg_duration = sum(duration_list) / len(duration_list)
RTF = sum(final_response_list) / sum(duration_list)
if all_delay_list != []:
delay_count = len(all_delay_list)
avg_delay = sum(all_delay_list) / len(all_delay_list)
delay_ratio = len(all_delay_list) / packet_count
min_delay = min(all_delay_list)
max_delay = max(all_delay_list)
else:
delay_count = 0.0
avg_delay = 0.0
delay_ratio = 0.0
min_delay = 0.0
max_delay = 0.0
print(
"************************* server/client result ***************************************"
)
print(
f"test num: {len(duration_list)}, avg first response: {avg_first_response} s, avg final response: {avg_final_response} s, avg duration: {avg_duration}, RTF: {RTF}."
)
print(
f"test num: {len(duration_list)}, packet count: {packet_count}, delay count: {delay_count}, avg delay time: {avg_delay} s, delay ratio: {delay_ratio} "
)
print(
f"min duration: {min(duration_list)} s, max duration: {max(duration_list)} s"
)
print(
f"min first response: {min(first_response_list)} s, max first response: {max(first_response_list)} s."
)
print(
f"min final response: {min(final_response_list)} s, max final response: {max(final_response_list)} s."
)
print(f"min delay: {min_delay} s, max delay: {max_delay}")