Cantonese TTS MFA pipeline, test=tts

3 years ago · cfa5a5021f
parent d9df9b05ca
commit cfa5a5021f
4 changed files with 10 additions and 3 deletions
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@ -32,12 +32,14 @@ pyworld>=0.2.12
 recommonmark>=0.5.0
 resampy
 sacrebleu
 shutil
 sphinx
 sphinx-autobuild
 sphinx-markdown-tables
 sphinx_rtd_theme
 textgrid
 timer
 ToJyutping
 typeguard
 webrtcvad
 websockets
--- a/examples/other/mfa/README.md
+++ b/examples/other/mfa/README.md
@ -9,7 +9,7 @@ If you want to get rhythm tags with duration through MFA tool, you may add flag
 Note that only CSMSC dataset is supported so far, and we replace `#` with `sp` in rhythm tags for MFA.
 # MFA for Cantonese language
-First, go download these datasets [Guangzhou_Cantonese_Scripted_Speech_Corpus_Daily_Use_Sentence](https://paddlespeech.bj.bcebos.com/datasets/Cantonese/Guangzhou_Cantonese_Scripted_Speech_Corpus_Daily_Use_Sentence.zip) and [Guangzhou_Cantonese_Scripted_Speech_Corpus_in_Vehicle](https://paddlespeech.bj.bcebos.com/datasets/Cantonese/Guangzhou_Cantonese_Scripted_Speech_Corpus_in_Vehicle.zip) under `~/datasets/`.
+First, go download these datasets [Guangzhou_Cantonese_Scripted_Speech_Corpus_Daily_Use_Sentence](https://magichub.com/datasets/guangzhou-cantonese-scripted-speech-corpus-daily-use-sentence/) and [Guangzhou_Cantonese_Scripted_Speech_Corpus_in_Vehicle](https://magichub.com/datasets/guangzhou-cantonese-scripted-speech-corpus-in-the-vehicle/) under `~/datasets/`.
 Then,
 ```bash
 ./run_canton.sh
--- a/examples/other/mfa/local/generate_canton_lexicon_wavlabs.py
+++ b/examples/other/mfa/local/generate_canton_lexicon_wavlabs.py
@ -1,3 +1,4 @@
 import argparse
 import os
 import re
 import shutil
@ -34,7 +35,9 @@ if __name__ == "__main__":
    parser.add_argument(
        "--output_lexicon", type=str, help="Path to save lexicon.")
    parser.add_argument(
-        "--output_wavlabs", type=str, help="Path to save lexicon.")
+        "--output_wavlabs",
        type=str,
        help="Path of wavs and labs for MFA training.")
    parser.add_argument(
        "--inputs",
        type=str,
--- a/setup.py
+++ b/setup.py
@ -67,6 +67,7 @@ base = [
    "pyyaml",
    "paddleslim>=2.3.4",
    "paddleaudio>=1.1.0",
    "ToJyutping",
 ]
 server = ["pattern_singleton", "websockets"]
@ -292,7 +293,8 @@ setup_info = dict(
    },
    # Package info
-    packages=find_packages(include=['paddlespeech*'], exclude=['utils', 'third_party']),
+    packages=find_packages(
        include=['paddlespeech*'], exclude=['utils', 'third_party']),
    zip_safe=True,
    classifiers=[
        'Development Status :: 5 - Production/Stable',