Cantonese TTS MFA pipeline, test=tts

3 years ago · 28dc83f63a
parent ec7a87f827
commit 28dc83f63a
2 changed files with 8 additions and 1 deletions
--- a/examples/other/mfa/README.md
+++ b/examples/other/mfa/README.md
@ -7,3 +7,10 @@ Run the following script to get started, for more detail, please see `run.sh`.
 # Rhythm tags for MFA
 If you want to get rhythm tags with duration through MFA tool, you may add flag `--rhy-with-duration` in the first two commands in `run.sh`
 Note that only CSMSC dataset is supported so far, and we replace `#` with `sp` in rhythm tags for MFA.
+
+# MFA for Cantonese language
+First, go download these datasets [Guangzhou_Cantonese_Scripted_Speech_Corpus_Daily_Use_Sentence](https://paddlespeech.bj.bcebos.com/datasets/Cantonese/Guangzhou_Cantonese_Scripted_Speech_Corpus_Daily_Use_Sentence.zip) and [Guangzhou_Cantonese_Scripted_Speech_Corpus_in_Vehicle](https://paddlespeech.bj.bcebos.com/datasets/Cantonese/Guangzhou_Cantonese_Scripted_Speech_Corpus_in_Vehicle.zip) under `~/dataset/`.
+Then,
+```bash
+./run_canton.sh
+```
--- a/examples/other/mfa/run_canton.sh
+++ b/examples/other/mfa/run_canton.sh
@ -4,7 +4,7 @@ mkdir -p $EXP_DIR
 LEXICON_NAME='canton'
 if [ ! -f "$EXP_DIR/$LEXICON_NAME.lexicon" ]; then
    echo "generating lexicon and training data..."
-    python local/generate_canton_lexicon_wavlabs.py --output_lexicon "$EXP_DIR/$LEXICON_NAME.lexicon" --output_wavlabs "$EXP_DIR/$LEXICON_NAME"_wavlabs --inputs Guangzhou_Cantonese_Scripted_Speech_Corpus_Daily_Use_Sentence Guangzhou_Cantonese_Scripted_Speech_Corpus_in_Vehicle
+    python local/generate_canton_lexicon_wavlabs.py --output_lexicon "$EXP_DIR/$LEXICON_NAME.lexicon" --output_wavlabs "$EXP_DIR/$LEXICON_NAME"_wavlabs --inputs ~/dataset/Guangzhou_Cantonese_Scripted_Speech_Corpus_Daily_Use_Sentence ~/dataset/Guangzhou_Cantonese_Scripted_Speech_Corpus_in_Vehicle
    echo "lexicon and training data done"
 fi