diff --git a/examples/aishell/asr1/run.sh b/examples/aishell/asr1/run.sh
index bd4f50e3f..aa45af647 100644
--- a/examples/aishell/asr1/run.sh
+++ b/examples/aishell/asr1/run.sh
@@ -24,12 +24,12 @@ fi
 
 if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
     # train model, all `ckpt` under `exp` dir
-    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt} ${ips}
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt} ${ips} || exit -1
 fi
 
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
     # avg n best model
-    avg.sh best exp/${ckpt}/checkpoints ${avg_num}
+    avg.sh best exp/${ckpt}/checkpoints ${avg_num} || exit -1
 fi
 
 if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
diff --git a/examples/aishell3/tts3/README.md b/examples/aishell3/tts3/README.md
index ee501fe2d..32d99ce47 100644
--- a/examples/aishell3/tts3/README.md
+++ b/examples/aishell3/tts3/README.md
@@ -109,9 +109,9 @@ pwg_aishell3_ckpt_0.5
 ```
 `./local/synthesize.sh` calls `${BIN_DIR}/../synthesize.py`, which can synthesize waveform from `metadata.jsonl`.
 ```bash
-CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name}
+CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} 0
 ```
-`--stage` controls the vocoder model during synthesis, which can be `0` or `1`, use `pwgan` or `hifigan` model as vocoder.
+The last number controls the vocoder model during synthesis, which can be `0` or `1`, use `pwgan` or `hifigan` model as vocoder.
 ```text
 usage: synthesize.py [-h]
                      [--am {speedyspeech_csmsc,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech,tacotron2_aishell3}]
@@ -158,9 +158,9 @@ optional arguments:
 ```
 `./local/synthesize_e2e.sh` calls `${BIN_DIR}/../synthesize_e2e.py`, which can synthesize waveform from text file.
 ```bash
-CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name}
+CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} 0
 ```
-`--stage` controls the vocoder model during synthesis, which can be `0` or `1`, use `pwgan` or `hifigan` model as vocoder.
+The last number controls the vocoder model during synthesis, which can be `0` or `1`, use `pwgan` or `hifigan` model as vocoder.
 ```text
 usage: synthesize_e2e.py [-h]
                          [--am {speedyspeech_csmsc,speedyspeech_aishell3,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech}]
diff --git a/examples/aishell3/tts3/local/synthesize.sh b/examples/aishell3/tts3/local/synthesize.sh
index 9134e0426..aec9af03b 100755
--- a/examples/aishell3/tts3/local/synthesize.sh
+++ b/examples/aishell3/tts3/local/synthesize.sh
@@ -4,8 +4,8 @@ config_path=$1
 train_output_path=$2
 ckpt_name=$3
 
-stage=0
-stop_stage=0
+stage=${4:-0}
+stop_stage=${4:-0}
 
 # pwgan
 if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
diff --git a/examples/aishell3/tts3/local/synthesize_e2e.sh b/examples/aishell3/tts3/local/synthesize_e2e.sh
index 2cc22ede2..807714c9a 100755
--- a/examples/aishell3/tts3/local/synthesize_e2e.sh
+++ b/examples/aishell3/tts3/local/synthesize_e2e.sh
@@ -4,8 +4,8 @@ config_path=$1
 train_output_path=$2
 ckpt_name=$3
 
-stage=0
-stop_stage=0
+stage=${4:-0}
+stop_stage=${4:-0}
 
 # pwgan
 if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
diff --git a/examples/aishell3/tts3/run.sh b/examples/aishell3/tts3/run.sh
index 3fd5d73c6..265f9054e 100755
--- a/examples/aishell3/tts3/run.sh
+++ b/examples/aishell3/tts3/run.sh
@@ -27,13 +27,13 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
 fi
 
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
-    # synthesize, vocoder is pwgan by default stage 0, stage 1 will use hifigan as vocoder
-    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
+    # synthesize, vocoder is pwgan by default 0, use 1 will use hifigan as vocoder
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} 0 || exit -1
 fi
 
 if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
-    # synthesize_e2e, vocoder is pwgan by default stage 0, stage 1 will use hifigan as vocoder
-    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
+    # synthesize_e2e, vocoder is pwgan by default 0, use 1 will use hifigan as vocoder
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} 0 || exit -1
 fi
 
 if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
diff --git a/examples/aishell3_vctk/ernie_sat/run.sh b/examples/aishell3_vctk/ernie_sat/run.sh
index d29f0b6e8..809a90e29 100755
--- a/examples/aishell3_vctk/ernie_sat/run.sh
+++ b/examples/aishell3_vctk/ernie_sat/run.sh
@@ -32,6 +32,6 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
 fi
 
 if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
-    # synthesize_e2e, default speech synthesis from Chinese to English, use stage1 to switch from English to Chinese
-    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
+    # synthesize_e2e, run both speech synthesis from Chinese to English and English to Chinese
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
 fi
diff --git a/examples/canton/tts3/local/synthesize_e2e.sh b/examples/canton/tts3/local/synthesize_e2e.sh
index 38b7e1af0..5d21aa9f0 100755
--- a/examples/canton/tts3/local/synthesize_e2e.sh
+++ b/examples/canton/tts3/local/synthesize_e2e.sh
@@ -4,8 +4,8 @@ config_path=$1
 train_output_path=$2
 ckpt_name=$3
 
-stage=0
-stop_stage=0
+stage=${4:-0}
+stop_stage=${4:-0}
 
 # pwgan
 if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
diff --git a/examples/canton/tts3/run.sh b/examples/canton/tts3/run.sh
index 0e1f52a1c..297091a7c 100755
--- a/examples/canton/tts3/run.sh
+++ b/examples/canton/tts3/run.sh
@@ -28,13 +28,13 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
 fi
 
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
-    # synthesize, vocoder is pwgan by default stage 0, stage 1 will use hifigan as vocoder
-    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
+    # synthesize, vocoder is pwgan by default 0, use 1 will use hifigan as vocoder
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} 0 || exit -1
 fi
 
 if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
-    # synthesize_e2e, vocoder is pwgan by default stage 0, stage 1 will use hifigan as vocoder
-    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
+    # synthesize_e2e, vocoder is pwgan by default 0, use 1 will use hifigan as vocoder
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} 0 || exit -1
 fi
 
 if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
diff --git a/examples/csmsc/tts0/README.md b/examples/csmsc/tts0/README.md
index 6269b419a..0babea7aa 100644
--- a/examples/csmsc/tts0/README.md
+++ b/examples/csmsc/tts0/README.md
@@ -99,9 +99,9 @@ pwg_baker_ckpt_0.4
 ```
 `./local/synthesize.sh` calls `${BIN_DIR}/../synthesize.py`, which can synthesize waveform from `metadata.jsonl`.
 ```bash
-CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name}
+CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} 0
 ```
-`--stage` controls the vocoder model during synthesis, which can use stage `0-4` to select the vocoder to use {`pwgan`, `multi band melgan`, `style melgan`, ` hifigan`, `wavernn`}
+The last number controls the vocoder model during synthesis, which can use `0-4` to select the vocoder in {`pwgan`, `multi band melgan`, `style melgan`, ` hifigan`, `wavernn`}
 
 ```text
 usage: synthesize.py [-h]
@@ -150,9 +150,9 @@ optional arguments:
 `./local/synthesize_e2e.sh` calls `${BIN_DIR}/../synthesize_e2e.py`, which can synthesize waveform from text file.
 
 ```bash
-CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name}
+CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} 0
 ```
-`--stage` controls the vocoder model during synthesis, which can use stage `0,1,3,4` to select the vocoder to use{`pwgan`, `multi band melgan`, `hifigan`, `wavernn`}
+The last number controls the vocoder model during synthesis, which can use `0,1,3,4` to select the vocoder in {`pwgan`, `multi band melgan`, `hifigan`, `wavernn`}
 
 ```text
 usage: synthesize_e2e.py [-h]
diff --git a/examples/csmsc/tts0/local/synthesize.sh b/examples/csmsc/tts0/local/synthesize.sh
index 5b8ed15e5..18c775ac1 100755
--- a/examples/csmsc/tts0/local/synthesize.sh
+++ b/examples/csmsc/tts0/local/synthesize.sh
@@ -3,8 +3,8 @@
 config_path=$1
 train_output_path=$2
 ckpt_name=$3
-stage=0
-stop_stage=0
+stage=${4:-0}
+stop_stage=${4:-0}
 
 # pwgan
 if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
@@ -21,7 +21,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
         --voc_stat=pwg_baker_ckpt_0.4/pwg_stats.npy \
         --test_metadata=dump/test/norm/metadata.jsonl \
         --output_dir=${train_output_path}/test \
-        --phones_dict=dump/phone_id_map.txt
+        --phones_dict=dump/phone_id_map.txt || exit -1
 fi
 
 # for more GAN Vocoders
@@ -40,7 +40,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
         --voc_stat=mb_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \
         --test_metadata=dump/test/norm/metadata.jsonl \
         --output_dir=${train_output_path}/test \
-        --phones_dict=dump/phone_id_map.txt
+        --phones_dict=dump/phone_id_map.txt || exit -1
 fi
 
 # style melgan
@@ -58,7 +58,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
         --voc_stat=style_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \
         --test_metadata=dump/test/norm/metadata.jsonl \
         --output_dir=${train_output_path}/test \
-        --phones_dict=dump/phone_id_map.txt
+        --phones_dict=dump/phone_id_map.txt || exit -1
 fi
 
 # hifigan
@@ -77,7 +77,7 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
         --voc_stat=hifigan_csmsc_ckpt_0.1.1/feats_stats.npy \
         --test_metadata=dump/test/norm/metadata.jsonl \
         --output_dir=${train_output_path}/test \
-        --phones_dict=dump/phone_id_map.txt
+        --phones_dict=dump/phone_id_map.txt || exit -1
 fi
 
 # wavernn
@@ -96,5 +96,5 @@ if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
         --voc_stat=wavernn_csmsc_ckpt_0.2.0/feats_stats.npy \
         --test_metadata=dump/test/norm/metadata.jsonl \
         --output_dir=${train_output_path}/test \
-        --phones_dict=dump/phone_id_map.txt
+        --phones_dict=dump/phone_id_map.txt || exit -1
 fi
diff --git a/examples/csmsc/tts0/local/synthesize_e2e.sh b/examples/csmsc/tts0/local/synthesize_e2e.sh
index 40b49aa1e..24b60927d 100755
--- a/examples/csmsc/tts0/local/synthesize_e2e.sh
+++ b/examples/csmsc/tts0/local/synthesize_e2e.sh
@@ -4,8 +4,8 @@ config_path=$1
 train_output_path=$2
 ckpt_name=$3
 
-stage=0
-stop_stage=0
+stage=${4:-0}
+stop_stage=${4:-0}
 
 # TODO: tacotron2 动转静的结果没有动态图的响亮, 可能还是 decode 的时候某个函数动静不对齐
 # pwgan
@@ -25,7 +25,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
         --text=${BIN_DIR}/../../assets/sentences.txt \
         --output_dir=${train_output_path}/test_e2e \
         --phones_dict=dump/phone_id_map.txt \
-        --inference_dir=${train_output_path}/inference
+        --inference_dir=${train_output_path}/inference || exit -1
         
 fi
 
@@ -47,7 +47,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
         --text=${BIN_DIR}/../../assets/sentences.txt \
         --output_dir=${train_output_path}/test_e2e \
         --phones_dict=dump/phone_id_map.txt \
-        --inference_dir=${train_output_path}/inference
+        --inference_dir=${train_output_path}/inference || exit -1
 fi
 
 # the pretrained models haven't release now
@@ -68,7 +68,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
         --lang=zh \
         --text=${BIN_DIR}/../../assets/sentences.txt \
         --output_dir=${train_output_path}/test_e2e \
-        --phones_dict=dump/phone_id_map.txt
+        --phones_dict=dump/phone_id_map.txt || exit -1
         # --inference_dir=${train_output_path}/inference
 fi
 
@@ -90,7 +90,7 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
         --text=${BIN_DIR}/../../assets/sentences.txt \
         --output_dir=${train_output_path}/test_e2e \
         --phones_dict=dump/phone_id_map.txt \
-        --inference_dir=${train_output_path}/inference
+        --inference_dir=${train_output_path}/inference || exit -1
 fi
 
 # wavernn
@@ -111,5 +111,5 @@ if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
         --text=${BIN_DIR}/../../assets/sentences.txt \
         --output_dir=${train_output_path}/test_e2e \
         --phones_dict=dump/phone_id_map.txt \
-        --inference_dir=${train_output_path}/inference
+        --inference_dir=${train_output_path}/inference || exit -1
 fi
diff --git a/examples/csmsc/tts0/run.sh b/examples/csmsc/tts0/run.sh
index 83bb02a5f..c3a7c7aa2 100755
--- a/examples/csmsc/tts0/run.sh
+++ b/examples/csmsc/tts0/run.sh
@@ -27,15 +27,15 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
 fi
 
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
-    # synthesize, vocoder is pwgan by default stage 0
-    # stage 1-4 to select the vocoder to use {multi band melgan, style melgan, hifigan, wavernn}
-    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
+    # synthesize, vocoder is pwgan by default 0
+    # use 1-4 to select the vocoder in {multi band melgan, style melgan, hifigan, wavernn}
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} 0 || exit -1
 fi
 
 if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
-    # synthesize_e2e, vocoder is pwgan by default stage 0
-    # stage 1,3,4 to select the vocoder to use {multi band melgan, hifigan, wavernn}
-    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
+    # synthesize_e2e, vocoder is pwgan by default 0
+    # use 1,3,4 to select the vocoder in {multi band melgan, hifigan, wavernn}
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} 0 || exit -1
 fi
 
 if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
diff --git a/examples/csmsc/tts2/README.md b/examples/csmsc/tts2/README.md
index 3c6e7d96c..1b40ded27 100644
--- a/examples/csmsc/tts2/README.md
+++ b/examples/csmsc/tts2/README.md
@@ -116,9 +116,9 @@ pwg_baker_ckpt_0.4
 ```
 `./local/synthesize.sh` calls `${BIN_DIR}/../synthesize.py`, which can synthesize waveform from `metadata.jsonl`.
 ```bash
-CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name}
+CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} 0
 ```
-`--stage` controls the vocoder model during synthesis, which can use stage `0-4` to select the vocoder to use {`pwgan`, `multi band melgan`, `style melgan`, `hifigan`, `wavernn`}
+The last number controls the vocoder model during synthesis, which can use `0-4` to select the vocoder in {`pwgan`, `multi band melgan`, `style melgan`, `hifigan`, `wavernn`}
 
 ```text
 usage: synthesize.py [-h]
@@ -166,9 +166,9 @@ optional arguments:
 ```
 `./local/synthesize_e2e.sh` calls `${BIN_DIR}/../synthesize_e2e.py`, which can synthesize waveform from text file.
 ```bash
-CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name}
+CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} 0
 ```
-`--stage` controls the vocoder model during synthesis, which can use stage `0,1,3,4` to select the vocoder to use {`pwgan`, `multi band melgan`, `hifigan`, `wavernn`}
+The last number controls the vocoder model during synthesis, which can use `0,1,3,4` to select the vocoder in {`pwgan`, `multi band melgan`, `hifigan`, `wavernn`}
 
 ```text
 usage: synthesize_e2e.py [-h]
diff --git a/examples/csmsc/tts2/local/synthesize.sh b/examples/csmsc/tts2/local/synthesize.sh
index b8982a16d..2489942bd 100755
--- a/examples/csmsc/tts2/local/synthesize.sh
+++ b/examples/csmsc/tts2/local/synthesize.sh
@@ -3,8 +3,8 @@
 config_path=$1
 train_output_path=$2
 ckpt_name=$3
-stage=0
-stop_stage=0
+stage=${4:-0}
+stop_stage=${4:-0}
 
 # pwgan
 if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
diff --git a/examples/csmsc/tts2/local/synthesize_e2e.sh b/examples/csmsc/tts2/local/synthesize_e2e.sh
index 2b2787295..ec994644a 100755
--- a/examples/csmsc/tts2/local/synthesize_e2e.sh
+++ b/examples/csmsc/tts2/local/synthesize_e2e.sh
@@ -4,8 +4,8 @@ config_path=$1
 train_output_path=$2
 ckpt_name=$3
 
-stage=0
-stop_stage=0
+stage=${4:-0}
+stop_stage=${4:-0}
 
 # pwgan
 if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
diff --git a/examples/csmsc/tts2/run.sh b/examples/csmsc/tts2/run.sh
index 6f62bc95b..b19e7bbd2 100755
--- a/examples/csmsc/tts2/run.sh
+++ b/examples/csmsc/tts2/run.sh
@@ -27,15 +27,15 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
 fi
 
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
-    # synthesize, vocoder is pwgan by default stage 0
-    # use stage 1-4 to select the vocoder to use {multi band melgan, style melgan, hifigan, wavernn}
-    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
+    # synthesize, vocoder is pwgan by default 0
+    # use 1-4 to select the vocoder to use {multi band melgan, style melgan, hifigan, wavernn}
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} 0 || exit -1
 fi
 
 if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
-    # synthesize_e2e, vocoder is pwgan by default stage 0
-    # use stage 1,3,4 to select the vocoder to use {multi band melgan, hifigan, wavernn}
-    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
+    # synthesize_e2e, vocoder is pwgan by default 0
+    # use 1,3,4 to select the vocoder to use {multi band melgan, hifigan, wavernn}
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} 0 || exit -1
 fi
 
 if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
diff --git a/examples/csmsc/tts3/README.md b/examples/csmsc/tts3/README.md
index 9d349426b..1cdf18285 100644
--- a/examples/csmsc/tts3/README.md
+++ b/examples/csmsc/tts3/README.md
@@ -107,9 +107,9 @@ pwg_baker_ckpt_0.4
 ```
 `./local/synthesize.sh` calls `${BIN_DIR}/../synthesize.py`, which can synthesize waveform from `metadata.jsonl`.
 ```bash
-CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name}
+CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} 0
 ```
-`--stage` controls the vocoder model during synthesis. The parameter values range from `0-4`, corresponding to the following five vocoder models: `pwgan`, `multi band melgan`, `style melgan`, `hifigan`, and `wavernn`.
+The last number controls the vocoder model during synthesis, which can use `0-4` to select the vocoder in {`pwgan`, `multi band melgan`, `style melgan`, `hifigan`, `wavernn`}
 
 ```text
 usage: synthesize.py [-h]
@@ -157,9 +157,9 @@ optional arguments:
 ```
 `./local/synthesize_e2e.sh` calls `${BIN_DIR}/../synthesize_e2e.py`, which can synthesize waveform from text file.
 ```bash
-CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name}
+CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} 0
 ```
-`--stage` controls the vocoder model during synthesis. The parameter values are {`0,1,3,4`}, corresponding to the following four vocoder models: `pwgan`, `multi band melgan`, `hifigan`, and `wavernn`.
+The last number controls the vocoder model during synthesis, which can use `0,1,3,4` to select the vocoder in {`pwgan`, `multi band melgan`, `hifigan`, `wavernn`}
 
 ```text
 usage: synthesize_e2e.py [-h]
diff --git a/examples/csmsc/tts3/README_cn.md b/examples/csmsc/tts3/README_cn.md
index 399c15d55..b8b575dc7 100644
--- a/examples/csmsc/tts3/README_cn.md
+++ b/examples/csmsc/tts3/README_cn.md
@@ -113,9 +113,9 @@ pwg_baker_ckpt_0.4
 `./local/synthesize.sh` 调用 `${BIN_DIR}/../synthesize.py` 即可从 `metadata.jsonl`中合成波形。
 
 ```bash
-CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name}
+CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} 0
 ```
-`--stage` 参数用于控制合成过程中使用的声码器模型。该参数的取值范围为 `0-4`，分别对应以下五种声码器模型：`pwgan`、`multi band melgan`、`style melgan`、`hifigan` 和 `wavernn`。
+最后一位参数 `0` 用于控制合成过程中使用的声码器模型。该参数的取值范围为 `0-4`，分别对应以下五种声码器模型：`pwgan`、`multi band melgan`、`style melgan`、`hifigan` 和 `wavernn`。
 
 ```text
 usage: synthesize.py [-h]
@@ -164,9 +164,9 @@ optional arguments:
 `./local/synthesize_e2e.sh` 调用 `${BIN_DIR}/../synthesize_e2e.py`，即可从文本文件中合成波形。
 
 ```bash
-CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name}
+CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} 0
 ```
-`--stage` 参数用于控制合成过程中使用的声码器模型。该参数的取值范围为{ `0,1,3,4`}，分别对应以下四种声码器模型：`pwgan`、`multi band melgan`、`hifigan` 和 `wavernn`。
+最后一位参数 `0` 用于控制合成过程中使用的声码器模型。该参数的取值范围为 {`0,1,3,4`}，分别对应以下四种声码器模型：`pwgan`、`multi band melgan`、`hifigan` 和 `wavernn`。
 
 ```text
 usage: synthesize_e2e.py [-h]
diff --git a/examples/csmsc/tts3/local/synthesize.sh b/examples/csmsc/tts3/local/synthesize.sh
index 043bb52f4..1d1e7c1d4 100755
--- a/examples/csmsc/tts3/local/synthesize.sh
+++ b/examples/csmsc/tts3/local/synthesize.sh
@@ -3,8 +3,9 @@
 config_path=$1
 train_output_path=$2
 ckpt_name=$3
-stage=0
-stop_stage=0
+
+stage=${4:-0}
+stop_stage=${4:-0}
 
 # pwgan
 if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
@@ -21,7 +22,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
         --voc_stat=pwg_baker_ckpt_0.4/pwg_stats.npy \
         --test_metadata=dump/test/norm/metadata.jsonl \
         --output_dir=${train_output_path}/test \
-        --phones_dict=dump/phone_id_map.txt
+        --phones_dict=dump/phone_id_map.txt || exit -1
 fi
 
 # for more GAN Vocoders
@@ -40,7 +41,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
         --voc_stat=mb_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \
         --test_metadata=dump/test/norm/metadata.jsonl \
         --output_dir=${train_output_path}/test \
-        --phones_dict=dump/phone_id_map.txt
+        --phones_dict=dump/phone_id_map.txt || exit -1
 fi
 
 # style melgan
@@ -58,7 +59,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
         --voc_stat=style_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \
         --test_metadata=dump/test/norm/metadata.jsonl \
         --output_dir=${train_output_path}/test \
-        --phones_dict=dump/phone_id_map.txt
+        --phones_dict=dump/phone_id_map.txt || exit -1
 fi
 
 # hifigan
@@ -77,7 +78,7 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
         --voc_stat=hifigan_csmsc_ckpt_0.1.1/feats_stats.npy \
         --test_metadata=dump/test/norm/metadata.jsonl \
         --output_dir=${train_output_path}/test \
-        --phones_dict=dump/phone_id_map.txt
+        --phones_dict=dump/phone_id_map.txt || exit -1
 fi
 
 # wavernn
@@ -96,5 +97,5 @@ if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
         --voc_stat=wavernn_csmsc_ckpt_0.2.0/feats_stats.npy \
         --test_metadata=dump/test/norm/metadata.jsonl \
         --output_dir=${train_output_path}/test \
-        --phones_dict=dump/phone_id_map.txt
+        --phones_dict=dump/phone_id_map.txt || exit -1
 fi
diff --git a/examples/csmsc/tts3/local/synthesize_e2e.sh b/examples/csmsc/tts3/local/synthesize_e2e.sh
index 35a5598a8..b84ab9832 100755
--- a/examples/csmsc/tts3/local/synthesize_e2e.sh
+++ b/examples/csmsc/tts3/local/synthesize_e2e.sh
@@ -3,9 +3,8 @@
 config_path=$1
 train_output_path=$2
 ckpt_name=$3
-
-stage=0
-stop_stage=0
+stage=${4:-0}
+stop_stage=${4:-0}
 
 # pwgan
 if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
@@ -24,7 +23,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
         --text=${BIN_DIR}/../../assets/sentences.txt \
         --output_dir=${train_output_path}/test_e2e \
         --phones_dict=dump/phone_id_map.txt \
-        --inference_dir=${train_output_path}/inference
+        --inference_dir=${train_output_path}/inference || exit -1
 fi
 
 # for more GAN Vocoders
@@ -45,7 +44,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
         --text=${BIN_DIR}/../../assets/sentences.txt \
         --output_dir=${train_output_path}/test_e2e \
         --phones_dict=dump/phone_id_map.txt \
-        --inference_dir=${train_output_path}/inference
+        --inference_dir=${train_output_path}/inference || exit -1
 fi
 
 # the pretrained models haven't release now
@@ -66,7 +65,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
         --lang=zh \
         --text=${BIN_DIR}/../../assets/sentences.txt \
         --output_dir=${train_output_path}/test_e2e \
-        --phones_dict=dump/phone_id_map.txt
+        --phones_dict=dump/phone_id_map.txt || exit -1
         # --inference_dir=${train_output_path}/inference
 fi
 
@@ -88,7 +87,7 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
         --text=${BIN_DIR}/../../assets/sentences.txt \
         --output_dir=${train_output_path}/test_e2e \
         --phones_dict=dump/phone_id_map.txt \
-        --inference_dir=${train_output_path}/inference
+        --inference_dir=${train_output_path}/inference || exit -1
 fi
 
 
@@ -110,5 +109,5 @@ if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
         --text=${BIN_DIR}/../../assets/sentences.txt \
         --output_dir=${train_output_path}/test_e2e \
         --phones_dict=dump/phone_id_map.txt \
-        --inference_dir=${train_output_path}/inference
+        --inference_dir=${train_output_path}/inference || exit -1
 fi
diff --git a/examples/csmsc/tts3/run.sh b/examples/csmsc/tts3/run.sh
index 96fa84471..1269afa39 100755
--- a/examples/csmsc/tts3/run.sh
+++ b/examples/csmsc/tts3/run.sh
@@ -27,15 +27,15 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
 fi
 
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
-    # synthesize, vocoder is pwgan by default stage 0
-    # use stage 1-4 to select the vocoder to use {multi band melgan, style melgan, hifigan, wavernn}
-    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
+    # synthesize, vocoder is pwgan by default 0
+    # use 1-4 to select the vocoder in {multi band melgan, style melgan, hifigan, wavernn}
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} 0 || exit -1
 fi
 
 if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
-    # synthesize_e2e, vocoder is pwgan by default stage 0
-    # use stage 1,3,4 to select the vocoder to use {multi band melgan, hifigan, wavernn}
-    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
+    # synthesize_e2e, vocoder is pwgan by default 0
+    # use 1,3,4 to select the vocoder in {multi band melgan, hifigan, wavernn}
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} 0 || exit -1
 fi
 
 if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
diff --git a/examples/csmsc/tts3_rhy/local/synthesize_e2e.sh b/examples/csmsc/tts3_rhy/local/synthesize_e2e.sh
index bf7229e13..39ced6985 100755
--- a/examples/csmsc/tts3_rhy/local/synthesize_e2e.sh
+++ b/examples/csmsc/tts3_rhy/local/synthesize_e2e.sh
@@ -4,8 +4,8 @@ config_path=$1
 train_output_path=$2
 ckpt_name=$3
 
-stage=0
-stop_stage=0
+stage=${4:-0}
+stop_stage=${4:-0}
 
 # pwgan
 if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
diff --git a/examples/csmsc/tts3_rhy/run.sh b/examples/csmsc/tts3_rhy/run.sh
index 294ceded5..49d7aeeae 100755
--- a/examples/csmsc/tts3_rhy/run.sh
+++ b/examples/csmsc/tts3_rhy/run.sh
@@ -28,13 +28,13 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
 fi
 
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
-    # synthesize, vocoder is pwgan by default stage 0
-    # use stage 1-4 to select the vocoder to use {multi band melgan, style melgan, hifigan, wavernn}
-    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
+    # synthesize, vocoder is pwgan by default 0
+    # use 1-4 to select the vocoder in {multi band melgan, style melgan, hifigan, wavernn}
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} 0 || exit -1
 fi
 
 if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
-    # synthesize_e2e, vocoder is pwgan by default stage 0
-    # use stage 1,3,4 to select the vocoder to use {multi band melgan, hifigan, wavernn}
-    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
+    # synthesize_e2e, vocoder is pwgan by default 0
+    # use 1,3,4 to select the vocoder in {multi band melgan, hifigan, wavernn}
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} 0 || exit -1
 fi
diff --git a/examples/csmsc/voc1/local/synthesize.sh b/examples/csmsc/voc1/local/synthesize.sh
index 145557b3d..890d4e0fc 100755
--- a/examples/csmsc/voc1/local/synthesize.sh
+++ b/examples/csmsc/voc1/local/synthesize.sh
@@ -11,4 +11,4 @@ python3 ${BIN_DIR}/../synthesize.py \
     --checkpoint=${train_output_path}/checkpoints/${ckpt_name} \
     --test-metadata=dump/test/norm/metadata.jsonl \
     --output-dir=${train_output_path}/test \
-    --generator-type=pwgan
+    --generator-type=pwgan || exit -1
diff --git a/examples/csmsc/voc1/local/synthesize_e2e.sh b/examples/csmsc/voc1/local/synthesize_e2e.sh
index 7e1a6e8b7..e217622f1 100644
--- a/examples/csmsc/voc1/local/synthesize_e2e.sh
+++ b/examples/csmsc/voc1/local/synthesize_e2e.sh
@@ -19,4 +19,4 @@ python3 ${BIN_DIR}/../../synthesize_e2e.py \
     --text=${BIN_DIR}/../../assets/sentences.txt \
     --output_dir=${train_output_path}/test_e2e \
     --phones_dict=dump/phone_id_map.txt \
-    --inference_dir=${train_output_path}/inference
\ No newline at end of file
+    --inference_dir=${train_output_path}/inference || exit -1
diff --git a/examples/ljspeech/tts3/README.md b/examples/ljspeech/tts3/README.md
index 12bd73777..90133e307 100644
--- a/examples/ljspeech/tts3/README.md
+++ b/examples/ljspeech/tts3/README.md
@@ -105,9 +105,9 @@ pwg_ljspeech_ckpt_0.5
 ```
 `./local/synthesize.sh` calls `${BIN_DIR}/../synthesize.py`, which can synthesize waveform from `metadata.jsonl`.
 ```bash
-CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name}
+CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} 0
 ```
-`--stage` controls the vocoder model during synthesis, which can be `0` or `1`, use `pwgan` or `hifigan` model as vocoder.
+The last number controls the vocoder model during synthesis, which can be `0` or `1`, use `pwgan` or `hifigan` model as vocoder.
 
 ```text
 usage: synthesize.py [-h]
@@ -155,9 +155,9 @@ optional arguments:
 ```
 `./local/synthesize_e2e.sh` calls `${BIN_DIR}/../synthesize_e2e.py`, which can synthesize waveform from text file.
 ```bash
-CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name}
+CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} 0
 ```
-`--stage` controls the vocoder model during synthesis, which can be `0` or `1`, use `pwgan` or `hifigan` model as vocoder.
+The last number controls the vocoder model during synthesis, which can be `0` or `1`, use `pwgan` or `hifigan` model as vocoder.
 
 ```text
 usage: synthesize_e2e.py [-h]
diff --git a/examples/ljspeech/tts3/local/synthesize.sh b/examples/ljspeech/tts3/local/synthesize.sh
index 0733e96fa..32e9affbf 100755
--- a/examples/ljspeech/tts3/local/synthesize.sh
+++ b/examples/ljspeech/tts3/local/synthesize.sh
@@ -4,8 +4,8 @@ config_path=$1
 train_output_path=$2
 ckpt_name=$3
 
-stage=0
-stop_stage=0
+stage=${4:-0}
+stop_stage=${4:-0}
 
 # pwgan
 if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
diff --git a/examples/ljspeech/tts3/local/synthesize_e2e.sh b/examples/ljspeech/tts3/local/synthesize_e2e.sh
index 3f2340808..da44c99df 100755
--- a/examples/ljspeech/tts3/local/synthesize_e2e.sh
+++ b/examples/ljspeech/tts3/local/synthesize_e2e.sh
@@ -4,8 +4,8 @@ config_path=$1
 train_output_path=$2
 ckpt_name=$3
 
-stage=0
-stop_stage=0
+stage=${4:-0}
+stop_stage=${4:-0}
 
 # pwgan
 if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
diff --git a/examples/ljspeech/tts3/run.sh b/examples/ljspeech/tts3/run.sh
index b02126e6e..5a089ae9b 100755
--- a/examples/ljspeech/tts3/run.sh
+++ b/examples/ljspeech/tts3/run.sh
@@ -27,13 +27,13 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
 fi
 
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
-    # synthesize, vocoder is pwgan by default stage 0, stage 1 will use hifigan as vocoder
-    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
+    # synthesize, vocoder is pwgan by default 0, use 1 will use hifigan as vocoder
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} 0 || exit -1
 fi
 
 if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
-    # synthesize_e2e, vocoder is pwgan by default stage 0, stage 1 will use hifigan as vocoder
-    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
+    # synthesize_e2e, vocoder is pwgan by default 0, use 1 will use hifigan as vocoder
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} 0 || exit -1
 fi
 
 if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
diff --git a/examples/opencpop/svs1/README.md b/examples/opencpop/svs1/README.md
index 534a741fd..afc83e82b 100644
--- a/examples/opencpop/svs1/README.md
+++ b/examples/opencpop/svs1/README.md
@@ -172,9 +172,9 @@ optional arguments:
 `local/pinyin_to_phone.txt` comes from the readme of the opencpop dataset, indicating the mapping from pinyin to phonemes in opencpop.
 
 ```bash
-CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name}
+CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} 0
 ```
-`--stage` controls the vocoder model during synthesis, which can be `0` or `1`, use `pwgan` or `hifigan` model as vocoder.
+The last number controls the vocoder model during synthesis, which can be `0` or `1`, use `pwgan` or `hifigan` model as vocoder.
 
 ```text
 usage: synthesize_e2e.py [-h]
diff --git a/examples/opencpop/svs1/README_cn.md b/examples/opencpop/svs1/README_cn.md
index e35967d71..35005aade 100644
--- a/examples/opencpop/svs1/README_cn.md
+++ b/examples/opencpop/svs1/README_cn.md
@@ -175,9 +175,9 @@ optional arguments:
 `local/pinyin_to_phone.txt`来源于opencpop数据集中的README，表示opencpop中拼音到音素的映射。
 
 ```bash
-CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name}
+CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} 0
 ```
-`--stage` 用于选择合成时使用的声码器模型，取值为 `0` 或 `1`，分别对应使用 `pwgan` 或 `hifigan` 模型作为声码器。
+最后一位参数 `0` 用于选择合成时使用的声码器模型，取值为 `0` 或 `1`，分别对应使用 `pwgan` 或 `hifigan` 模型作为声码器。
 
 ```text
 usage: synthesize_e2e.py [-h]
diff --git a/examples/opencpop/svs1/local/synthesize_e2e.sh b/examples/opencpop/svs1/local/synthesize_e2e.sh
index e8d0cc45a..f3e2dc9f4 100755
--- a/examples/opencpop/svs1/local/synthesize_e2e.sh
+++ b/examples/opencpop/svs1/local/synthesize_e2e.sh
@@ -4,8 +4,8 @@ config_path=$1
 train_output_path=$2
 ckpt_name=$3
 
-stage=0
-stop_stage=0
+stage=${4:-0}
+stop_stage=${4:-0}
 
 # pwgan
 if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
diff --git a/examples/opencpop/svs1/run.sh b/examples/opencpop/svs1/run.sh
index 6c6688b2f..9f780a3da 100755
--- a/examples/opencpop/svs1/run.sh
+++ b/examples/opencpop/svs1/run.sh
@@ -32,6 +32,6 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
 fi
 
 if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
-    # synthesize_e2e, vocoder is pwgan by default, stage 1 will use hifigan as vocoder
-    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
+    # synthesize_e2e, vocoder is pwgan by default, use 1 will use hifigan as vocoder
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} 0 || exit -1
 fi
diff --git a/examples/vctk/ernie_sat/README.md b/examples/vctk/ernie_sat/README.md
index 3fe99172b..8efd94a6b 100644
--- a/examples/vctk/ernie_sat/README.md
+++ b/examples/vctk/ernie_sat/README.md
@@ -85,9 +85,8 @@ hifigan_vctk_ckpt_0.2.0
 ```
 `./local/synthesize.sh` calls `${BIN_DIR}/../synthesize.py`, which can synthesize waveform from `metadata.jsonl`.
 ```bash
-CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name}
+CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name}
 ```
-`--stage` controls the vocoder model during synthesis, which can be `0` , use`hifigan` model as vocoder.
 
 ##  Speech Synthesis and Speech Editing
 
@@ -142,7 +141,6 @@ You can check the text of downloaded wavs in `source/README.md`.
 ```bash
 ./run.sh --stage 3 --stop-stage 3 --gpus 0
 ```
-`stage 3` of `run.sh` calls `local/synthesize_e2e.sh`, `stage 0` of it is **Speech Synthesis** and  `stage 1` of it is **Speech Editing**.
 
 You can modify `--wav_path`、`--old_str` and `--new_str` yourself, `--old_str` should be the text corresponding to the audio of  `--wav_path`, `--new_str` should be designed according to `--task_name`, both `--source_lang` and `--target_lang` should be `en` for model trained with VCTK dataset.
 ## Pretrained Model
diff --git a/examples/vctk/ernie_sat/run.sh b/examples/vctk/ernie_sat/run.sh
index c9bdfde79..628557847 100755
--- a/examples/vctk/ernie_sat/run.sh
+++ b/examples/vctk/ernie_sat/run.sh
@@ -28,10 +28,10 @@ fi
 
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
     # synthesize, vocoder is hifigan by default
-    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
 fi
 
 if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
-    # synthesize, task_name is speech synthesize by default stage 0, stage 1 will use speech edit as taskname
-    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
+    # synthesize, run both speech synthesize and speech edit
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
 fi
diff --git a/examples/vctk/tts3/README.md b/examples/vctk/tts3/README.md
index a9d568ebf..72169c8f3 100644
--- a/examples/vctk/tts3/README.md
+++ b/examples/vctk/tts3/README.md
@@ -108,9 +108,9 @@ pwg_vctk_ckpt_0.1.1
 ```
 `./local/synthesize.sh` calls `${BIN_DIR}/../synthesize.py`, which can synthesize waveform from `metadata.jsonl`.
 ```bash
-CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name}
+CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} 0
 ```
-`--stage` controls the vocoder model during synthesis, which can be `0` or `1`, use `pwgan` or `hifigan` model as vocoder.
+The last number controls the vocoder model during synthesis, which can be `0` or `1`, use `pwgan` or `hifigan` model as vocoder.
 
 ```text
 usage: synthesize.py [-h]
@@ -158,9 +158,9 @@ optional arguments:
 ```
 `./local/synthesize_e2e.sh` calls `${BIN_DIR}/../synthesize_e2e.py`, which can synthesize waveform from text file.
 ```bash
-CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name}
+CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} 0
 ```
-`--stage` controls the vocoder model during synthesis, which can be `0` or `1`, use `pwgan` or `hifigan` model as vocoder.
+The last number controls the vocoder model during synthesis, which can be `0` or `1`, use `pwgan` or `hifigan` model as vocoder.
 
 ```text
 usage: synthesize_e2e.py [-h]
diff --git a/examples/vctk/tts3/local/synthesize.sh b/examples/vctk/tts3/local/synthesize.sh
index 87145959f..9c1302ace 100755
--- a/examples/vctk/tts3/local/synthesize.sh
+++ b/examples/vctk/tts3/local/synthesize.sh
@@ -4,8 +4,8 @@ config_path=$1
 train_output_path=$2
 ckpt_name=$3
 
-stage=0
-stop_stage=0
+stage=${4:-0}
+stop_stage=${4:-0}
 
 # pwgan
 if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
diff --git a/examples/vctk/tts3/local/synthesize_e2e.sh b/examples/vctk/tts3/local/synthesize_e2e.sh
index 971c83853..09506edcd 100755
--- a/examples/vctk/tts3/local/synthesize_e2e.sh
+++ b/examples/vctk/tts3/local/synthesize_e2e.sh
@@ -4,8 +4,8 @@ config_path=$1
 train_output_path=$2
 ckpt_name=$3
 
-stage=0
-stop_stage=0
+stage=${4:-0}
+stop_stage=${4:-0}
 
 # pwgan
 if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
diff --git a/examples/vctk/tts3/run.sh b/examples/vctk/tts3/run.sh
index 8ce3b707d..d37068c22 100755
--- a/examples/vctk/tts3/run.sh
+++ b/examples/vctk/tts3/run.sh
@@ -27,13 +27,13 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
 fi
 
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
-    # synthesize, vocoder is pwgan by default stage 0, stage 1 will use hifigan as vocoder
-    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
+    # synthesize, vocoder is pwgan by default 0, use 1 will use hifigan as vocoder
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} 0 || exit -1
 fi
 
 if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
-    # synthesize_e2e, vocoder is pwgan by default 0, stage 1 will use hifigan as vocoder
-    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
+    # synthesize_e2e, vocoder is pwgan by 0, use 1 will use hifigan as vocoder
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} 0 || exit -1
 fi
 
 if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then