fix rtf, fix inf input of speedyspeech, fix stft dir for 2.2.0

pull/935/head
TianYuan 3 years ago
parent 6dbcd7720d
commit 04bcb6a12d

@ -86,8 +86,9 @@ def main():
N += wav.size
T += t.elapse
speed = wav.size / t.elapse
rtf = config.fs / speed
print(
f"{utt_id}, mel: {mel.shape}, wave: {wav.shape}, time: {t.elapse}s, Hz: {speed}, RTF: {config.fs / speed}."
f"{utt_id}, mel: {mel.shape}, wave: {wav.shape}, time: {t.elapse}s, Hz: {speed}, RTF: {rtf}."
)
sf.write(str(output_dir / (utt_id + ".wav")), wav, samplerate=config.fs)
print(f"generation speed: {N / T}Hz, RTF: {config.fs / (N / T) }")

@ -86,8 +86,9 @@ def main():
N += wav.size
T += t.elapse
speed = wav.size / t.elapse
rtf = config.fs / speed
print(
f"{utt_id}, mel: {mel.shape}, wave: {wav.shape}, time: {t.elapse}s, Hz: {speed}, RTF: {config.fs / speed}."
f"{utt_id}, mel: {mel.shape}, wave: {wav.shape}, time: {t.elapse}s, Hz: {speed}, RTF: {rtf}."
)
sf.write(str(output_dir / (utt_id + ".wav")), wav, samplerate=config.fs)
print(f"generation speed: {N / T}Hz, RTF: {config.fs / (N / T) }")

@ -96,8 +96,8 @@ def main():
input_ids = frontend.get_input_ids(
sentence, merge_sentences=True, get_tone_ids=True)
phone_ids = input_ids["phone_ids"]
tone_ids = input_ids["tone_ids"]
phone_ids = input_ids["phone_ids"].numpy()
tone_ids = input_ids["tone_ids"].numpy()
phones = phone_ids[0]
tones = tone_ids[0]

@ -51,7 +51,7 @@ def stft(x,
# calculate window
window = signal.get_window(window, win_length, fftbins=True)
window = paddle.to_tensor(window)
x_stft = paddle.tensor.signal.stft(
x_stft = paddle.signal.stft(
x,
fft_size,
hop_length,

Loading…
Cancel
Save