# am_block and am_pad only for fastspeech2_cnndecoder_onnx model to streaming am infer,
# am_block and am_pad only for fastspeech2_cnndecoder_onnx model to streaming am infer,
# when am_pad set 12, streaming synthetic audio is the same as non-streaming synthetic audio
# when am_pad set 12, streaming synthetic audio is the same as non-streaming synthetic audio
am_block:42
am_block:72
am_pad:12
am_pad:12
# voc_pad and voc_block voc model to streaming voc infer,
# voc_pad and voc_block voc model to streaming voc infer,
# when voc model is mb_melgan_csmsc, voc_pad set 14, streaming synthetic audio is the same as non-streaming synthetic audio; The minimum value of pad can be set to 7, streaming synthetic audio sounds normal
# when voc model is mb_melgan_csmsc, voc_pad set 14, streaming synthetic audio is the same as non-streaming synthetic audio; The minimum value of pad can be set to 7, streaming synthetic audio sounds normal
# when voc model is hifigan_csmsc, voc_pad set 20, streaming synthetic audio is the same as non-streaming synthetic audio; voc_pad set 14, streaming synthetic audio sounds normal
# when voc model is hifigan_csmsc, voc_pad set 20, streaming synthetic audio is the same as non-streaming synthetic audio; voc_pad set 14, streaming synthetic audio sounds normal
voc_block:14
voc_block:36
voc_pad:14
voc_pad:14
@ -91,12 +91,12 @@ tts_online-onnx:
lang:'zh'
lang:'zh'
# am_block and am_pad only for fastspeech2_cnndecoder_onnx model to streaming am infer,
# am_block and am_pad only for fastspeech2_cnndecoder_onnx model to streaming am infer,
# when am_pad set 12, streaming synthetic audio is the same as non-streaming synthetic audio
# when am_pad set 12, streaming synthetic audio is the same as non-streaming synthetic audio
am_block:42
am_block:72
am_pad:12
am_pad:12
# voc_pad and voc_block voc model to streaming voc infer,
# voc_pad and voc_block voc model to streaming voc infer,
# when voc model is mb_melgan_csmsc_onnx, voc_pad set 14, streaming synthetic audio is the same as non-streaming synthetic audio; The minimum value of pad can be set to 7, streaming synthetic audio sounds normal
# when voc model is mb_melgan_csmsc_onnx, voc_pad set 14, streaming synthetic audio is the same as non-streaming synthetic audio; The minimum value of pad can be set to 7, streaming synthetic audio sounds normal
# when voc model is hifigan_csmsc_onnx, voc_pad set 20, streaming synthetic audio is the same as non-streaming synthetic audio; voc_pad set 14, streaming synthetic audio sounds normal
# when voc model is hifigan_csmsc_onnx, voc_pad set 20, streaming synthetic audio is the same as non-streaming synthetic audio; voc_pad set 14, streaming synthetic audio sounds normal
voc_block:14
voc_block:36
voc_pad:14
voc_pad:14
# voc_upsample should be same as n_shift on voc config.
# voc_upsample should be same as n_shift on voc config.
logger.info(f"Audio successfully saved in {output}")
else:
logger.error("Audio save failed.")
ifdelay_time_list!=[]:
logger.info(
f"Delay situation: total number of packages: {len(receive_time_list)}, the number of delayed packets: {len(delay_time_list)}, minimum delay time: {min(delay_time_list)} s, maximum delay time: {max(delay_time_list)} s, average delay time: {sum(delay_time_list)/len(delay_time_list)} s, delay rate:{len(delay_time_list)/len(receive_time_list)}"
)
else:
logger.info("The sentence has no delay in streaming synthesis.")
# am_block and am_pad only for fastspeech2_cnndecoder_onnx model to streaming am infer,
# am_block and am_pad only for fastspeech2_cnndecoder_onnx model to streaming am infer,
# when am_pad set 12, streaming synthetic audio is the same as non-streaming synthetic audio
# when am_pad set 12, streaming synthetic audio is the same as non-streaming synthetic audio
am_block:42
am_block:72
am_pad:12
am_pad:12
# voc_pad and voc_block voc model to streaming voc infer,
# voc_pad and voc_block voc model to streaming voc infer,
# when voc model is mb_melgan_csmsc, voc_pad set 14, streaming synthetic audio is the same as non-streaming synthetic audio; The minimum value of pad can be set to 7, streaming synthetic audio sounds normal
# when voc model is mb_melgan_csmsc, voc_pad set 14, streaming synthetic audio is the same as non-streaming synthetic audio; The minimum value of pad can be set to 7, streaming synthetic audio sounds normal
# when voc model is hifigan_csmsc, voc_pad set 20, streaming synthetic audio is the same as non-streaming synthetic audio; voc_pad set 14, streaming synthetic audio sounds normal
# when voc model is hifigan_csmsc, voc_pad set 20, streaming synthetic audio is the same as non-streaming synthetic audio; voc_pad set 14, streaming synthetic audio sounds normal
voc_block:14
voc_block:36
voc_pad:14
voc_pad:14
@ -91,12 +91,12 @@ tts_online-onnx:
lang:'zh'
lang:'zh'
# am_block and am_pad only for fastspeech2_cnndecoder_onnx model to streaming am infer,
# am_block and am_pad only for fastspeech2_cnndecoder_onnx model to streaming am infer,
# when am_pad set 12, streaming synthetic audio is the same as non-streaming synthetic audio
# when am_pad set 12, streaming synthetic audio is the same as non-streaming synthetic audio
am_block:42
am_block:72
am_pad:12
am_pad:12
# voc_pad and voc_block voc model to streaming voc infer,
# voc_pad and voc_block voc model to streaming voc infer,
# when voc model is mb_melgan_csmsc_onnx, voc_pad set 14, streaming synthetic audio is the same as non-streaming synthetic audio; The minimum value of pad can be set to 7, streaming synthetic audio sounds normal
# when voc model is mb_melgan_csmsc_onnx, voc_pad set 14, streaming synthetic audio is the same as non-streaming synthetic audio; The minimum value of pad can be set to 7, streaming synthetic audio sounds normal
# when voc model is hifigan_csmsc_onnx, voc_pad set 20, streaming synthetic audio is the same as non-streaming synthetic audio; voc_pad set 14, streaming synthetic audio sounds normal
# when voc model is hifigan_csmsc_onnx, voc_pad set 20, streaming synthetic audio is the same as non-streaming synthetic audio; voc_pad set 14, streaming synthetic audio sounds normal
voc_block:14
voc_block:36
voc_pad:14
voc_pad:14
# voc_upsample should be same as n_shift on voc config.
# voc_upsample should be same as n_shift on voc config.
asserttaginpretrained_models,'The model "{}" you want to use has not been supported, please choose other models.\nThe support models includes:\n\t\t{}\n'.format(
asserttaginpretrained_models,'The model "{}" you want to use has not been supported, please choose other models.\nThe support models includes:\n\t\t{}\n'.format(
#Download and returns pretrained resources path of current task.
"""
support_models=list(pretrained_models.keys())
asserttaginpretrained_models,'The model "{}" you want to use has not been supported, please choose other models.\nThe support models includes:\n\t\t{}\n'.format(
asserttaginpretrained_models,'The model "{}" you want to use has not been supported, please choose other models.\nThe support models includes:\n\t\t{}\n'.format(
print(f"Audio successfully saved in {args.output}")
else:
print("Audio save failed.")
ifdelay_time_list!=[]:
print(
f"Delay situation: total number of packages: {len(receive_time_list)}, the number of delayed packets: {len(delay_time_list)}, minimum delay time: {min(delay_time_list)} s, maximum delay time: {max(delay_time_list)} s, average delay time: {sum(delay_time_list)/len(delay_time_list)} s, delay rate:{len(delay_time_list)/len(receive_time_list)}"
)
else:
print("The sentence has no delay in streaming synthesis.")
print(f"Audio successfully saved in {args.output}")
else:
print("Audio save failed.")
ifdelay_time_list!=[]:
print(
f"Delay situation: total number of packages: {len(receive_time_list)}, the number of delayed packets: {len(delay_time_list)}, minimum delay time: {min(delay_time_list)} s, maximum delay time: {max(delay_time_list)} s, average delay time: {sum(delay_time_list)/len(delay_time_list)} s, delay rate:{len(delay_time_list)/len(receive_time_list)}"
)
else:
print("The sentence has no delay in streaming synthesis.")