From d9df9b05ca3dec21d1ad52e92b5258f604d8737f Mon Sep 17 00:00:00 2001 From: WongLaw Date: Wed, 8 Feb 2023 12:24:29 +0000 Subject: [PATCH] Cantonese TTS MFA pipeline, test=tts --- examples/other/mfa/README.md | 2 +- examples/other/mfa/run_canton.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/other/mfa/README.md b/examples/other/mfa/README.md index 88054019b..d79c96c2a 100644 --- a/examples/other/mfa/README.md +++ b/examples/other/mfa/README.md @@ -9,7 +9,7 @@ If you want to get rhythm tags with duration through MFA tool, you may add flag Note that only CSMSC dataset is supported so far, and we replace `#` with `sp` in rhythm tags for MFA. # MFA for Cantonese language -First, go download these datasets [Guangzhou_Cantonese_Scripted_Speech_Corpus_Daily_Use_Sentence](https://paddlespeech.bj.bcebos.com/datasets/Cantonese/Guangzhou_Cantonese_Scripted_Speech_Corpus_Daily_Use_Sentence.zip) and [Guangzhou_Cantonese_Scripted_Speech_Corpus_in_Vehicle](https://paddlespeech.bj.bcebos.com/datasets/Cantonese/Guangzhou_Cantonese_Scripted_Speech_Corpus_in_Vehicle.zip) under `~/dataset/`. +First, go download these datasets [Guangzhou_Cantonese_Scripted_Speech_Corpus_Daily_Use_Sentence](https://paddlespeech.bj.bcebos.com/datasets/Cantonese/Guangzhou_Cantonese_Scripted_Speech_Corpus_Daily_Use_Sentence.zip) and [Guangzhou_Cantonese_Scripted_Speech_Corpus_in_Vehicle](https://paddlespeech.bj.bcebos.com/datasets/Cantonese/Guangzhou_Cantonese_Scripted_Speech_Corpus_in_Vehicle.zip) under `~/datasets/`. Then, ```bash ./run_canton.sh diff --git a/examples/other/mfa/run_canton.sh b/examples/other/mfa/run_canton.sh index 69c113276..cef6a2f04 100755 --- a/examples/other/mfa/run_canton.sh +++ b/examples/other/mfa/run_canton.sh @@ -4,7 +4,7 @@ mkdir -p $EXP_DIR LEXICON_NAME='canton' if [ ! -f "$EXP_DIR/$LEXICON_NAME.lexicon" ]; then echo "generating lexicon and training data..." - python local/generate_canton_lexicon_wavlabs.py --output_lexicon "$EXP_DIR/$LEXICON_NAME.lexicon" --output_wavlabs "$EXP_DIR/$LEXICON_NAME"_wavlabs --inputs ~/dataset/Guangzhou_Cantonese_Scripted_Speech_Corpus_Daily_Use_Sentence ~/dataset/Guangzhou_Cantonese_Scripted_Speech_Corpus_in_Vehicle + python local/generate_canton_lexicon_wavlabs.py --output_lexicon "$EXP_DIR/$LEXICON_NAME.lexicon" --output_wavlabs "$EXP_DIR/$LEXICON_NAME"_wavlabs --inputs ~/datasets/Guangzhou_Cantonese_Scripted_Speech_Corpus_Daily_Use_Sentence ~/datasets/Guangzhou_Cantonese_Scripted_Speech_Corpus_in_Vehicle echo "lexicon and training data done" fi