From 10b00b4da7d2d79418527417af8cc8bf9ac72cbc Mon Sep 17 00:00:00 2001 From: lizi <49679880@qq.com> Date: Fri, 29 Apr 2022 16:19:26 +0800 Subject: [PATCH 1/2] fix the reorganize_aishell3 trouble now it can generate lab files of audio files under training classification --- .../other/mfa/local/reorganize_aishell3.py | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/examples/other/mfa/local/reorganize_aishell3.py b/examples/other/mfa/local/reorganize_aishell3.py index 0ad306626..a97ee29ed 100644 --- a/examples/other/mfa/local/reorganize_aishell3.py +++ b/examples/other/mfa/local/reorganize_aishell3.py @@ -46,22 +46,22 @@ def write_lab(root_dir: Union[str, Path], text_path = root_dir / sub_set / 'content.txt' new_dir = output_dir / sub_set - with open(text_path, 'r') as rf: - for line in rf: - wav_id, context = line.strip().split('\t') - spk_id = wav_id[:7] - transcript_name = wav_id.split('.')[0] + '.lab' - transcript_path = new_dir / spk_id / transcript_name - context_list = context.split() - word_list = context_list[0:-1:2] - pinyin_list = context_list[1::2] - wf = open(transcript_path, 'w') - if script_type == 'word': - # add space between chinese char - new_context = ' '.join(word_list) - elif script_type == 'pinyin': - new_context = ' '.join(pinyin_list) - wf.write(new_context + '\n') + with open(text_path, 'r') as rf: + for line in rf: + wav_id, context = line.strip().split('\t') + spk_id = wav_id[:7] + transcript_name = wav_id.split('.')[0] + '.lab' + transcript_path = new_dir / spk_id / transcript_name + context_list = context.split() + word_list = context_list[0:-1:2] + pinyin_list = context_list[1::2] + wf = open(transcript_path, 'w') + if script_type == 'word': + # add space between chinese char + new_context = ' '.join(word_list) + elif script_type == 'pinyin': + new_context = ' '.join(pinyin_list) + wf.write(new_context + '\n') def reorganize_aishell3(root_dir: Union[str, Path], From fba0693a208a2818ffea1a874562fe67733071f4 Mon Sep 17 00:00:00 2001 From: Jerryuhoo Date: Fri, 29 Apr 2022 17:43:50 +0800 Subject: [PATCH 2/2] fix random speaker embedding bug, test=tts --- paddlespeech/t2s/exps/voice_cloning.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paddlespeech/t2s/exps/voice_cloning.py b/paddlespeech/t2s/exps/voice_cloning.py index 9257b07de..2742cd068 100644 --- a/paddlespeech/t2s/exps/voice_cloning.py +++ b/paddlespeech/t2s/exps/voice_cloning.py @@ -110,10 +110,10 @@ def voice_cloning(args): print(f"{utt_id} done!") # Randomly generate numbers of 0 ~ 0.2, 256 is the dim of spk_emb random_spk_emb = np.random.rand(256) * 0.2 - random_spk_emb = paddle.to_tensor(random_spk_emb) + random_spk_emb = paddle.to_tensor(random_spk_emb, dtype='float32') utt_id = "random_spk_emb" with paddle.no_grad(): - wav = voc_inference(am_inference(phone_ids, spk_emb=spk_emb)) + wav = voc_inference(am_inference(phone_ids, spk_emb=random_spk_emb)) sf.write( str(output_dir / (utt_id + ".wav")), wav.numpy(),