diff --git a/docs/requirements.txt b/docs/requirements.txt
index 609f27925..d4e213873 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -32,12 +32,14 @@ pyworld>=0.2.12
 recommonmark>=0.5.0
 resampy
 sacrebleu
+shutil
 sphinx
 sphinx-autobuild
 sphinx-markdown-tables
 sphinx_rtd_theme
 textgrid
 timer
+ToJyutping
 typeguard
 webrtcvad
 websockets
diff --git a/examples/other/mfa/README.md b/examples/other/mfa/README.md
index d79c96c2a..b85dac4d9 100644
--- a/examples/other/mfa/README.md
+++ b/examples/other/mfa/README.md
@@ -9,7 +9,7 @@ If you want to get rhythm tags with duration through MFA tool, you may add flag
 Note that only CSMSC dataset is supported so far, and we replace `#` with `sp` in rhythm tags for MFA.
 
 # MFA for Cantonese language
-First, go download these datasets [Guangzhou_Cantonese_Scripted_Speech_Corpus_Daily_Use_Sentence](https://paddlespeech.bj.bcebos.com/datasets/Cantonese/Guangzhou_Cantonese_Scripted_Speech_Corpus_Daily_Use_Sentence.zip) and [Guangzhou_Cantonese_Scripted_Speech_Corpus_in_Vehicle](https://paddlespeech.bj.bcebos.com/datasets/Cantonese/Guangzhou_Cantonese_Scripted_Speech_Corpus_in_Vehicle.zip) under `~/datasets/`.
+First, go download these datasets [Guangzhou_Cantonese_Scripted_Speech_Corpus_Daily_Use_Sentence](https://magichub.com/datasets/guangzhou-cantonese-scripted-speech-corpus-daily-use-sentence/) and [Guangzhou_Cantonese_Scripted_Speech_Corpus_in_Vehicle](https://magichub.com/datasets/guangzhou-cantonese-scripted-speech-corpus-in-the-vehicle/) under `~/datasets/`.
 Then,
 ```bash
 ./run_canton.sh
diff --git a/examples/other/mfa/local/generate_canton_lexicon_wavlabs.py b/examples/other/mfa/local/generate_canton_lexicon_wavlabs.py
index 0da8f8911..bb09ab6f4 100644
--- a/examples/other/mfa/local/generate_canton_lexicon_wavlabs.py
+++ b/examples/other/mfa/local/generate_canton_lexicon_wavlabs.py
@@ -1,3 +1,4 @@
+import argparse
 import os
 import re
 import shutil
@@ -34,7 +35,9 @@ if __name__ == "__main__":
     parser.add_argument(
         "--output_lexicon", type=str, help="Path to save lexicon.")
     parser.add_argument(
-        "--output_wavlabs", type=str, help="Path to save lexicon.")
+        "--output_wavlabs",
+        type=str,
+        help="Path of wavs and labs for MFA training.")
     parser.add_argument(
         "--inputs",
         type=str,
diff --git a/setup.py b/setup.py
index 76bc5be8d..0839d6d9e 100644
--- a/setup.py
+++ b/setup.py
@@ -67,6 +67,7 @@ base = [
     "pyyaml",
     "paddleslim>=2.3.4",
     "paddleaudio>=1.1.0",
+    "ToJyutping",
 ]
 
 server = ["pattern_singleton", "websockets"]
@@ -292,7 +293,8 @@ setup_info = dict(
     },
 
     # Package info
-    packages=find_packages(include=['paddlespeech*'], exclude=['utils', 'third_party']),
+    packages=find_packages(
+        include=['paddlespeech*'], exclude=['utils', 'third_party']),
     zip_safe=True,
     classifiers=[
         'Development Status :: 5 - Production/Stable',