diff --git a/examples/other/mfa/local/reorganize_aishell3.py b/examples/other/mfa/local/reorganize_aishell3.py index 0ad30662..a97ee29e 100644 --- a/examples/other/mfa/local/reorganize_aishell3.py +++ b/examples/other/mfa/local/reorganize_aishell3.py @@ -46,22 +46,22 @@ def write_lab(root_dir: Union[str, Path], text_path = root_dir / sub_set / 'content.txt' new_dir = output_dir / sub_set - with open(text_path, 'r') as rf: - for line in rf: - wav_id, context = line.strip().split('\t') - spk_id = wav_id[:7] - transcript_name = wav_id.split('.')[0] + '.lab' - transcript_path = new_dir / spk_id / transcript_name - context_list = context.split() - word_list = context_list[0:-1:2] - pinyin_list = context_list[1::2] - wf = open(transcript_path, 'w') - if script_type == 'word': - # add space between chinese char - new_context = ' '.join(word_list) - elif script_type == 'pinyin': - new_context = ' '.join(pinyin_list) - wf.write(new_context + '\n') + with open(text_path, 'r') as rf: + for line in rf: + wav_id, context = line.strip().split('\t') + spk_id = wav_id[:7] + transcript_name = wav_id.split('.')[0] + '.lab' + transcript_path = new_dir / spk_id / transcript_name + context_list = context.split() + word_list = context_list[0:-1:2] + pinyin_list = context_list[1::2] + wf = open(transcript_path, 'w') + if script_type == 'word': + # add space between chinese char + new_context = ' '.join(word_list) + elif script_type == 'pinyin': + new_context = ' '.join(pinyin_list) + wf.write(new_context + '\n') def reorganize_aishell3(root_dir: Union[str, Path],