From ec7a87f827abbb0db850faef4820ccf127a94e8e Mon Sep 17 00:00:00 2001 From: WongLaw Date: Wed, 8 Feb 2023 10:14:05 +0000 Subject: [PATCH] Cantonese TTS MFA pipeline, test=tts --- .../local/generate_canton_lexicon_wavlabs.py | 76 +++++++++++++++++++ examples/other/mfa/run_canton.sh | 34 +++++++++ 2 files changed, 110 insertions(+) create mode 100644 examples/other/mfa/local/generate_canton_lexicon_wavlabs.py create mode 100755 examples/other/mfa/run_canton.sh diff --git a/examples/other/mfa/local/generate_canton_lexicon_wavlabs.py b/examples/other/mfa/local/generate_canton_lexicon_wavlabs.py new file mode 100644 index 000000000..0da8f8911 --- /dev/null +++ b/examples/other/mfa/local/generate_canton_lexicon_wavlabs.py @@ -0,0 +1,76 @@ +import os +import re +import shutil + +import ToJyutping + + +def check(str): + my_re = re.compile(r'[A-Za-z]', re.S) + res = re.findall(my_re, str) + if len(res): + return True + else: + return False + + +consonants = [ + 'p', 'b', 't', 'd', 'ts', 'dz', 'k', 'g', 'kw', 'gw', 'f', 'h', 'l', 'm', + 'ng', 'n', 's', 'y', 'w', 'c', 'z', 'j' +] + + +def get_lines(canton): + for consonant in consonants: + if canton.startswith(consonant): + c, v = canton[:len(consonant)], canton[len(consonant):] + return canton + ' ' + c + ' ' + v + return canton + ' ' + canton + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Generate lexicon for Cantonese pinyin to phoneme for MFA") + parser.add_argument( + "--output_lexicon", type=str, help="Path to save lexicon.") + parser.add_argument( + "--output_wavlabs", type=str, help="Path to save lexicon.") + parser.add_argument( + "--inputs", + type=str, + dest="list", + nargs="+", + help="Path to the cantonese datasets.") + args = parser.parse_args() + + os.mkdir(args.output_wavlabs) + + utterance_info = [] + all_canton = [] + for input_ in args.inputs: + input_ = os.path.join(input_, "UTTERANCEINFO.txt") + with open(input_, 'r') as f: + utterance_info = f.readlines()[1:] + + for utterance_line in utterance_info: + _, wav_name, spk, _, text = utterance_line.split('\t') + text = text.strip().replace(' ', '') + # check the characters and drop the short text. + if not check(text) and len(text) > 2: + source_path = os.path.join(input_, 'WAV', spk, wav_name) + target_path = os.path.join(args.output_wavlabs, wav_name) + shutil.copy(source_path, target_path) + + lab_name = wav_name.split('.')[0] + '.lab' + lab_target_path = os.path.join(args.output_wavlabs, lab_name) + canton_list = ToJyutping.get_jyutping_text(text) + with open(lab_target_path, 'w') as f: + f.write(canton_list) + + canton_list = canton_list.split(' ') + all_canton.extend(canton_list) + all_canton = set(all_canton) + + with open(args.output_lexicon, 'w') as f: + for canton in all_canton: + f.write(get_lines(canton) + '\n') diff --git a/examples/other/mfa/run_canton.sh b/examples/other/mfa/run_canton.sh new file mode 100755 index 000000000..79bf3387f --- /dev/null +++ b/examples/other/mfa/run_canton.sh @@ -0,0 +1,34 @@ +EXP_DIR=exp + +mkdir -p $EXP_DIR +LEXICON_NAME='canton' +if [ ! -f "$EXP_DIR/$LEXICON_NAME.lexicon" ]; then + echo "generating lexicon and training data..." + python local/generate_canton_lexicon_wavlabs.py --output_lexicon "$EXP_DIR/$LEXICON_NAME.lexicon" --output_wavlabs "$EXP_DIR/$LEXICON_NAME"_wavlabs --inputs Guangzhou_Cantonese_Scripted_Speech_Corpus_Daily_Use_Sentence Guangzhou_Cantonese_Scripted_Speech_Corpus_in_Vehicle + echo "lexicon and training data done" +fi + + +MFA_DOWNLOAD_DIR=local/ + +if [ ! -f "$MFA_DOWNLOAD_DIR/montreal-forced-aligner_linux.tar.gz" ]; then + echo "downloading mfa..." + (cd $MFA_DOWNLOAD_DIR && wget https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner/releases/download/v1.0.1/montreal-forced-aligner_linux.tar.gz) + echo "download mfa done!" +fi + +if [ ! -d "$MFA_DOWNLOAD_DIR/montreal-forced-aligner" ]; then + echo "extracting mfa..." + (cd $MFA_DOWNLOAD_DIR && tar xvf "montreal-forced-aligner_linux.tar.gz") + echo "extraction done!" +fi + +export PATH="$MFA_DOWNLOAD_DIR/montreal-forced-aligner/bin" +if [ ! -d "$EXP_DIR/canton_alignment" ]; then + echo "Start MFA training..." + mfa_train_and_align "$EXP_DIR/$LEXICON_NAME"_wavlabs "$EXP_DIR/$LEXICON_NAME.lexicon" $EXP_DIR/canton_alignment -o $EXP_DIR/canton_model --clean --verbose --temp_directory $EXP_DIR/.mfa_train_and_align + echo "training done!" + echo "results: $EXP_DIR/canton_alignment" + echo "model: $EXP_DIR/canton_model" +fi +