diff --git a/docs/requirements.txt b/docs/requirements.txt index 609f27925..d4e213873 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -32,12 +32,14 @@ pyworld>=0.2.12 recommonmark>=0.5.0 resampy sacrebleu +shutil sphinx sphinx-autobuild sphinx-markdown-tables sphinx_rtd_theme textgrid timer +ToJyutping typeguard webrtcvad websockets diff --git a/examples/other/mfa/README.md b/examples/other/mfa/README.md index d79c96c2a..b85dac4d9 100644 --- a/examples/other/mfa/README.md +++ b/examples/other/mfa/README.md @@ -9,7 +9,7 @@ If you want to get rhythm tags with duration through MFA tool, you may add flag Note that only CSMSC dataset is supported so far, and we replace `#` with `sp` in rhythm tags for MFA. # MFA for Cantonese language -First, go download these datasets [Guangzhou_Cantonese_Scripted_Speech_Corpus_Daily_Use_Sentence](https://paddlespeech.bj.bcebos.com/datasets/Cantonese/Guangzhou_Cantonese_Scripted_Speech_Corpus_Daily_Use_Sentence.zip) and [Guangzhou_Cantonese_Scripted_Speech_Corpus_in_Vehicle](https://paddlespeech.bj.bcebos.com/datasets/Cantonese/Guangzhou_Cantonese_Scripted_Speech_Corpus_in_Vehicle.zip) under `~/datasets/`. +First, go download these datasets [Guangzhou_Cantonese_Scripted_Speech_Corpus_Daily_Use_Sentence](https://magichub.com/datasets/guangzhou-cantonese-scripted-speech-corpus-daily-use-sentence/) and [Guangzhou_Cantonese_Scripted_Speech_Corpus_in_Vehicle](https://magichub.com/datasets/guangzhou-cantonese-scripted-speech-corpus-in-the-vehicle/) under `~/datasets/`. Then, ```bash ./run_canton.sh diff --git a/examples/other/mfa/local/generate_canton_lexicon_wavlabs.py b/examples/other/mfa/local/generate_canton_lexicon_wavlabs.py index 0da8f8911..bb09ab6f4 100644 --- a/examples/other/mfa/local/generate_canton_lexicon_wavlabs.py +++ b/examples/other/mfa/local/generate_canton_lexicon_wavlabs.py @@ -1,3 +1,4 @@ +import argparse import os import re import shutil @@ -34,7 +35,9 @@ if __name__ == "__main__": parser.add_argument( "--output_lexicon", type=str, help="Path to save lexicon.") parser.add_argument( - "--output_wavlabs", type=str, help="Path to save lexicon.") + "--output_wavlabs", + type=str, + help="Path of wavs and labs for MFA training.") parser.add_argument( "--inputs", type=str, diff --git a/setup.py b/setup.py index 76bc5be8d..0839d6d9e 100644 --- a/setup.py +++ b/setup.py @@ -67,6 +67,7 @@ base = [ "pyyaml", "paddleslim>=2.3.4", "paddleaudio>=1.1.0", + "ToJyutping", ] server = ["pattern_singleton", "websockets"] @@ -292,7 +293,8 @@ setup_info = dict( }, # Package info - packages=find_packages(include=['paddlespeech*'], exclude=['utils', 'third_party']), + packages=find_packages( + include=['paddlespeech*'], exclude=['utils', 'third_party']), zip_safe=True, classifiers=[ 'Development Status :: 5 - Production/Stable',