|
|
|
@ -271,7 +271,7 @@ class SpeechCollator():
|
|
|
|
|
utts.append(utt)
|
|
|
|
|
# audio
|
|
|
|
|
audios.append(audio) # [T, D]
|
|
|
|
|
audio_lens.append(audio.shape[1])
|
|
|
|
|
audio_lens.append(audio.shape[0])
|
|
|
|
|
# text
|
|
|
|
|
# for training, text is token ids
|
|
|
|
|
# else text is string, convert to unicode ord
|
|
|
|
|