From 7939884c3f3289213684dd99b1ea5ad767b02abf Mon Sep 17 00:00:00 2001 From: WongLaw Date: Mon, 7 Nov 2022 06:28:19 +0000 Subject: [PATCH] Revised Rhythm label for MFA, test=tts --- examples/other/mfa/local/reorganize_baker.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/examples/other/mfa/local/reorganize_baker.py b/examples/other/mfa/local/reorganize_baker.py index 2d5ed5f02..0e0035bda 100644 --- a/examples/other/mfa/local/reorganize_baker.py +++ b/examples/other/mfa/local/reorganize_baker.py @@ -33,6 +33,22 @@ import librosa import soundfile as sf from tqdm import tqdm +repalce_dict = { + ";": "", + "。": "", + ":": "", + "—": "", + ")": "", + ",": "", + "“": "", + "(": "", + "、": "", + "…": "", + "!": "", + "?": "", + "”": "" +} + def get_transcripts(path: Union[str, Path]): transcripts = {} @@ -100,6 +116,7 @@ def reorganize_baker(root_dir: Union[str, Path], def insert_rhy(sentence_first, sentence_second): sub = '#' return_words = [] + sentence_first = sentence_first.translate(str.maketrans(repalce_dict)) rhy_idx = [substr.start() for substr in re.finditer(sub, sentence_first)] re_rhy_idx = [] sentence_first_ = sentence_first.replace("#1", "").replace(