diff --git a/examples/canton/tts3/README.md b/examples/canton/tts3/README.md
index 10c36e227..f4acd131c 100644
--- a/examples/canton/tts3/README.md
+++ b/examples/canton/tts3/README.md
@@ -14,7 +14,7 @@ cp -r ~/datasets/Guangzhou_Cantonese_Scripted_Speech_Corpus_in_Vehicle/WAV/* ~/d
 
 After that, it should be look like:
 ```
-~/datasets/canton_all_
+~/datasets/canton_all
 │   └── WAV
 │       └──G0001
 │       └──G0002
@@ -72,7 +72,7 @@ The dataset is split into 3 parts, namely `train`, `dev`, and` test`, each of wh
 
 Also, there is a `metadata.jsonl` in each subfolder. It is a table-like file that contains phones, text_lengths, speech_lengths, durations, the path of speech features, the path of pitch features, a path of energy features, speaker, and id of each utterance.
 
-### Training details can refer to the script of examples/aishell3/tts3.
+### Training details can refer to the script of [examples/aishell3/tts3](../../aishell3/tts3).
 
 ## Pretrained Model(Waiting========)
 Pretrained FastSpeech2 model with no silence in the edge of audios:
diff --git a/examples/canton/tts3/conf/default.yaml b/examples/canton/tts3/conf/default.yaml
index c1921b790..a101e6eea 100644
--- a/examples/canton/tts3/conf/default.yaml
+++ b/examples/canton/tts3/conf/default.yaml
@@ -16,6 +16,9 @@ fmax: 7600         # Maximum frequency of Mel basis.
 n_mels: 80         # The number of mel basis.
 
 # Only used for the model using pitch features (e.g. FastSpeech2)
+# The canton datasets we use are different from others like Databaker or LJSpeech, 
+# we set it to 110 to avoid too many zero-pitch problem. 
+# Reference: https://github.com/JeremyCCHsu/Python-Wrapper-for-World-Vocoder/issues/38
 f0min: 110          # Minimum f0 for pitch extraction.
 f0max: 400         # Maximum f0 for pitch extraction.
 
diff --git a/examples/canton/tts3/local/preprocess.sh b/examples/canton/tts3/local/preprocess.sh
index a7afaa1ad..f70b1c028 100755
--- a/examples/canton/tts3/local/preprocess.sh
+++ b/examples/canton/tts3/local/preprocess.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-stage=1
+stage=0
 stop_stage=100
 
 config_path=$1
diff --git a/examples/canton/tts3/local/synthesize.sh b/examples/canton/tts3/local/synthesize.sh
deleted file mode 100755
index dbbe7fdac..000000000
--- a/examples/canton/tts3/local/synthesize.sh
+++ /dev/null
@@ -1,47 +0,0 @@
-#!/bin/bash
-
-config_path=$1
-train_output_path=$2
-ckpt_name=$3
-
-stage=0
-stop_stage=0
-
-# pwgan
-if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
-    FLAGS_allocator_strategy=naive_best_fit \
-    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
-    python3 ${BIN_DIR}/../synthesize.py \
-        --am=fastspeech2_aishell3 \
-        --am_config=${config_path} \
-        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
-        --am_stat=dump/train/speech_stats.npy \
-        --voc=pwgan_aishell3 \
-        --voc_config=pwg_aishell3_ckpt_0.5/default.yaml \
-        --voc_ckpt=pwg_aishell3_ckpt_0.5/snapshot_iter_1000000.pdz \
-        --voc_stat=pwg_aishell3_ckpt_0.5/feats_stats.npy \
-        --test_metadata=dump/test/norm/metadata.jsonl \
-        --output_dir=${train_output_path}/test_new \
-        --phones_dict=dump/phone_id_map.txt \
-        --speaker_dict=dump/speaker_id_map.txt
-fi
-
-# hifigan
-if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
-    FLAGS_allocator_strategy=naive_best_fit \
-    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
-    python3 ${BIN_DIR}/../synthesize.py \
-        --am=fastspeech2_aishell3 \
-        --am_config=${config_path} \
-        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
-        --am_stat=dump/train/speech_stats.npy \
-        --voc=hifigan_aishell3 \
-        --voc_config=hifigan_aishell3_ckpt_0.2.0/default.yaml \
-        --voc_ckpt=hifigan_aishell3_ckpt_0.2.0/snapshot_iter_2500000.pdz \
-        --voc_stat=hifigan_aishell3_ckpt_0.2.0/feats_stats.npy \
-        --test_metadata=dump/test/norm/metadata.jsonl \
-        --output_dir=${train_output_path}/test \
-        --phones_dict=dump/phone_id_map.txt \
-        --speaker_dict=dump/speaker_id_map.txt
-fi
-
diff --git a/examples/canton/tts3/local/synthesize.sh b/examples/canton/tts3/local/synthesize.sh
new file mode 120000
index 000000000..5f8bb91fc
--- /dev/null
+++ b/examples/canton/tts3/local/synthesize.sh
@@ -0,0 +1 @@
+../../../csmsc/tts3/local/synthesize.sh
\ No newline at end of file
diff --git a/examples/canton/tts3/local/train.sh b/examples/canton/tts3/local/train.sh
deleted file mode 100755
index 1da72f117..000000000
--- a/examples/canton/tts3/local/train.sh
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/bin/bash
-
-config_path=$1
-train_output_path=$2
-
-python3 ${BIN_DIR}/train.py \
-    --train-metadata=dump/train/norm/metadata.jsonl \
-    --dev-metadata=dump/dev/norm/metadata.jsonl \
-    --config=${config_path} \
-    --output-dir=${train_output_path} \
-    --ngpu=2 \
-    --phones-dict=dump/phone_id_map.txt \
-    --speaker-dict=dump/speaker_id_map.txt
diff --git a/examples/canton/tts3/local/train.sh b/examples/canton/tts3/local/train.sh
new file mode 120000
index 000000000..d7b05058e
--- /dev/null
+++ b/examples/canton/tts3/local/train.sh
@@ -0,0 +1 @@
+../../../csmsc/tts3/local/train.sh
\ No newline at end of file
diff --git a/examples/canton/tts3/path.sh b/examples/canton/tts3/path.sh
deleted file mode 100755
index fb7e8411c..000000000
--- a/examples/canton/tts3/path.sh
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/bin/bash
-export MAIN_ROOT=`realpath ${PWD}/../../../`
-
-export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
-export LC_ALL=C
-
-export PYTHONDONTWRITEBYTECODE=1
-# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
-export PYTHONIOENCODING=UTF-8
-export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}
-
-MODEL=fastspeech2
-export BIN_DIR=${MAIN_ROOT}/paddlespeech/t2s/exps/${MODEL}
diff --git a/examples/canton/tts3/path.sh b/examples/canton/tts3/path.sh
new file mode 120000
index 000000000..4785b9095
--- /dev/null
+++ b/examples/canton/tts3/path.sh
@@ -0,0 +1 @@
+../../csmsc/tts3/path.sh
\ No newline at end of file
diff --git a/paddlespeech/t2s/datasets/get_feats.py b/paddlespeech/t2s/datasets/get_feats.py
index 21458f152..a90f1a417 100644
--- a/paddlespeech/t2s/datasets/get_feats.py
+++ b/paddlespeech/t2s/datasets/get_feats.py
@@ -102,7 +102,7 @@ class Pitch():
 
     def _convert_to_continuous_f0(self, f0: np.ndarray) -> np.ndarray:
         if (f0 == 0).all():
-            print("All frames seems to be unvoiced.")
+            print("All frames seems to be unvoiced, this utt will be removed.")
             return f0
 
         # padding start and end of f0 sequence
diff --git a/paddlespeech/t2s/exps/fastspeech2/preprocess.py b/paddlespeech/t2s/exps/fastspeech2/preprocess.py
index 04f79b11a..521b9a880 100644
--- a/paddlespeech/t2s/exps/fastspeech2/preprocess.py
+++ b/paddlespeech/t2s/exps/fastspeech2/preprocess.py
@@ -109,6 +109,8 @@ def process_sentence(config: Dict[str, Any],
         np.save(mel_path, logmel)
         # extract pitch and energy
         f0 = pitch_extractor.get_pitch(wav, duration=np.array(durations))
+        if (f0 == 0).all():
+            return None
         assert f0.shape[0] == len(durations)
         f0_dir = output_dir / "data_pitch"
         f0_dir.mkdir(parents=True, exist_ok=True)