diff --git a/demos/audio_searching/src/operations/load.py b/demos/audio_searching/src/operations/load.py
index 0d9edb78..d1ea0057 100644
--- a/demos/audio_searching/src/operations/load.py
+++ b/demos/audio_searching/src/operations/load.py
@@ -26,8 +26,9 @@ def get_audios(path):
     """
     supported_formats = [".wav", ".mp3", ".ogg", ".flac", ".m4a"]
     return [
-        item for sublist in [[os.path.join(dir, file) for file in files]
-                             for dir, _, files in list(os.walk(path))]
+        item
+        for sublist in [[os.path.join(dir, file) for file in files]
+                        for dir, _, files in list(os.walk(path))]
         for item in sublist if os.path.splitext(item)[1] in supported_formats
     ]
 
diff --git a/examples/aishell/asr0/local/train.sh b/examples/aishell/asr0/local/train.sh
index 54c642b6..102c051c 100755
--- a/examples/aishell/asr0/local/train.sh
+++ b/examples/aishell/asr0/local/train.sh
@@ -20,12 +20,21 @@ if [ ${seed} != 0 ]; then
     export FLAGS_cudnn_deterministic=True
 fi
 
+if [ ${ngpu} == 0 ]; then
 python3 -u ${BIN_DIR}/train.py \
 --ngpu ${ngpu} \
 --config ${config_path} \
 --output exp/${ckpt_name} \
 --model_type ${model_type} \
 --seed ${seed}
+else
+python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${BIN_DIR}/train.py \
+--ngpu ${ngpu} \
+--config ${config_path} \
+--output exp/${ckpt_name} \
+--model_type ${model_type} \
+--seed ${seed}
+fi
 
 if [ ${seed} != 0 ]; then
     unset FLAGS_cudnn_deterministic
diff --git a/examples/aishell/asr1/local/train.sh b/examples/aishell/asr1/local/train.sh
index 1c8593bd..5617f7ef 100755
--- a/examples/aishell/asr1/local/train.sh
+++ b/examples/aishell/asr1/local/train.sh
@@ -27,14 +27,25 @@ ckpt_name=$2
 
 mkdir -p exp
 
+if [ ${ngpu} == 0 ]; then
 python3 -u ${BIN_DIR}/train.py \
+--ngpu ${ngpu} \
 --seed ${seed} \
+--config ${config_path} \
+--output exp/${ckpt_name} \
+--profiler-options "${profiler_options}" \
+--benchmark-batch-size ${benchmark_batch_size} \
+--benchmark-max-step ${benchmark_max_step}
+else
+python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${BIN_DIR}/train.py \
 --ngpu ${ngpu} \
+--seed ${seed} \
 --config ${config_path} \
 --output exp/${ckpt_name} \
 --profiler-options "${profiler_options}" \
 --benchmark-batch-size ${benchmark_batch_size} \
 --benchmark-max-step ${benchmark_max_step}
+fi
 
 
 if [ ${seed} != 0  ]; then
diff --git a/examples/callcenter/asr1/local/train.sh b/examples/callcenter/asr1/local/train.sh
index 3e92fd16..03b4588e 100755
--- a/examples/callcenter/asr1/local/train.sh
+++ b/examples/callcenter/asr1/local/train.sh
@@ -21,11 +21,19 @@ if [ ${seed} != 0 ]; then
     export FLAGS_cudnn_deterministic=True
 fi
 
+if [ ${ngpu} == 0 ]; then
 python3 -u ${BIN_DIR}/train.py \
 --ngpu ${ngpu} \
 --config ${config_path} \
 --output exp/${ckpt_name} \
 --seed ${seed}
+else
+python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${BIN_DIR}/train.py \
+--ngpu ${ngpu} \
+--config ${config_path} \
+--output exp/${ckpt_name} \
+--seed ${seed}
+fi
 
 if [ ${seed} != 0 ]; then
     unset FLAGS_cudnn_deterministic
diff --git a/examples/librispeech/asr0/local/train.sh b/examples/librispeech/asr0/local/train.sh
index 0479398f..50d1d192 100755
--- a/examples/librispeech/asr0/local/train.sh
+++ b/examples/librispeech/asr0/local/train.sh
@@ -20,12 +20,21 @@ if [ ${seed} != 0 ]; then
     export FLAGS_cudnn_deterministic=True
 fi
 
+if [ ${ngpu} == 0 ]; then
 python3 -u ${BIN_DIR}/train.py \
 --ngpu ${ngpu} \
 --config ${config_path} \
 --output exp/${ckpt_name} \
 --model_type ${model_type} \
 --seed ${seed}
+else
+python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${BIN_DIR}/train.py \
+--ngpu ${ngpu} \
+--config ${config_path} \
+--output exp/${ckpt_name} \
+--model_type ${model_type} \
+--seed ${seed}
+fi
 
 if [ ${seed} != 0 ]; then
     unset FLAGS_cudnn_deterministic
diff --git a/examples/librispeech/asr1/local/train.sh b/examples/librispeech/asr1/local/train.sh
index 275d3a49..3860d85c 100755
--- a/examples/librispeech/asr1/local/train.sh
+++ b/examples/librispeech/asr1/local/train.sh
@@ -22,11 +22,19 @@ fi
 # export FLAGS_cudnn_exhaustive_search=true
 # export FLAGS_conv_workspace_size_limit=4000
 
+if [ ${ngpu} == 0 ]; then
 python3 -u ${BIN_DIR}/train.py \
 --ngpu ${ngpu} \
 --config ${config_path} \
 --output exp/${ckpt_name} \
 --seed ${seed}
+else
+python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${BIN_DIR}/train.py \
+--ngpu ${ngpu} \
+--config ${config_path} \
+--output exp/${ckpt_name} \
+--seed ${seed}
+fi
 
 if [ ${seed} != 0 ]; then
     unset FLAGS_cudnn_deterministic
diff --git a/examples/librispeech/asr2/local/train.sh b/examples/librispeech/asr2/local/train.sh
index 898391f4..560424ea 100755
--- a/examples/librispeech/asr2/local/train.sh
+++ b/examples/librispeech/asr2/local/train.sh
@@ -19,12 +19,21 @@ if [ ${seed} != 0 ]; then
     export FLAGS_cudnn_deterministic=True
 fi
 
+if [ ${ngpu} == 0 ]; then
 python3 -u ${BIN_DIR}/train.py \
+--ngpu ${ngpu} \
 --model-name u2_kaldi \
+--config ${config_path} \
+--output exp/${ckpt_name} \
+--seed ${seed}
+else
+python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${BIN_DIR}/train.py \
 --ngpu ${ngpu} \
+--model-name u2_kaldi \
 --config ${config_path} \
 --output exp/${ckpt_name} \
 --seed ${seed}
+fi
 
 if [ ${seed} != 0 ]; then
     unset FLAGS_cudnn_deterministic
diff --git a/examples/ted_en_zh/st0/local/train.sh b/examples/ted_en_zh/st0/local/train.sh
index e366376b..ad00653b 100755
--- a/examples/ted_en_zh/st0/local/train.sh
+++ b/examples/ted_en_zh/st0/local/train.sh
@@ -19,11 +19,19 @@ if [ ${seed} != 0 ]; then
     export FLAGS_cudnn_deterministic=True
 fi
 
+if [ ${ngpu} == 0 ]; then
 python3 -u ${BIN_DIR}/train.py \
 --ngpu ${ngpu} \
 --config ${config_path} \
 --output exp/${ckpt_name} \
 --seed ${seed}
+else
+python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${BIN_DIR}/train.py \
+--ngpu ${ngpu} \
+--config ${config_path} \
+--output exp/${ckpt_name} \
+--seed ${seed}
+fi
 
 if [ ${seed} != 0 ]; then
     unset FLAGS_cudnn_deterministic
diff --git a/examples/ted_en_zh/st1/local/train.sh b/examples/ted_en_zh/st1/local/train.sh
index a8e4acaa..5da64e99 100755
--- a/examples/ted_en_zh/st1/local/train.sh
+++ b/examples/ted_en_zh/st1/local/train.sh
@@ -20,12 +20,21 @@ if [ ${seed} != 0 ]; then
     export FLAGS_cudnn_deterministic=True
 fi
 
+if [ ${ngpu} == 0 ]; then
 python3 -u ${BIN_DIR}/train.py \
 --ngpu ${ngpu} \
 --config ${config_path} \
 --output exp/${ckpt_name} \
 --checkpoint_path "${ckpt_path}" \
 --seed ${seed}
+else
+python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${BIN_DIR}/train.py \
+--ngpu ${ngpu} \
+--config ${config_path} \
+--output exp/${ckpt_name} \
+--checkpoint_path "${ckpt_path}" \
+--seed ${seed}
+fi
 
 if [ ${seed} != 0 ]; then
     unset FLAGS_cudnn_deterministic
@@ -36,4 +45,4 @@ if [ $? -ne 0 ]; then
     exit 1
 fi
 
-exit 0
\ No newline at end of file
+exit 0
diff --git a/examples/timit/asr1/local/train.sh b/examples/timit/asr1/local/train.sh
index 9b3fa177..66140758 100755
--- a/examples/timit/asr1/local/train.sh
+++ b/examples/timit/asr1/local/train.sh
@@ -19,11 +19,19 @@ if [ ${seed} != 0  ]; then
     export FLAGS_cudnn_deterministic=True
 fi
 
+if [ ${ngpu} == 0 ]; then
 python3 -u ${BIN_DIR}/train.py \
 --ngpu ${ngpu} \
 --config ${config_path} \
 --output exp/${ckpt_name} \
 --seed ${seed}
+else
+python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${BIN_DIR}/train.py \
+--ngpu ${ngpu} \
+--config ${config_path} \
+--output exp/${ckpt_name} \
+--seed ${seed}
+fi
 
 if [ ${seed} != 0 ]; then
     unset FLAGS_cudnn_deterministic
diff --git a/examples/tiny/asr0/local/train.sh b/examples/tiny/asr0/local/train.sh
index a69b6ddb..9060be67 100755
--- a/examples/tiny/asr0/local/train.sh
+++ b/examples/tiny/asr0/local/train.sh
@@ -26,6 +26,7 @@ model_type=$3
 
 mkdir -p exp
 
+if [ ${ngpu} == 0 ]; then
 python3 -u ${BIN_DIR}/train.py \
 --ngpu ${ngpu} \
 --config ${config_path} \
@@ -33,6 +34,15 @@ python3 -u ${BIN_DIR}/train.py \
 --model_type ${model_type} \
 --profiler-options "${profiler_options}" \
 --seed ${seed}
+else
+python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${BIN_DIR}/train.py \
+--ngpu ${ngpu} \
+--config ${config_path} \
+--output exp/${ckpt_name} \
+--model_type ${model_type} \
+--profiler-options "${profiler_options}" \
+--seed ${seed}
+fi
 
 if [ ${seed} != 0  ]; then
     unset FLAGS_cudnn_deterministic
diff --git a/examples/tiny/asr1/local/train.sh b/examples/tiny/asr1/local/train.sh
index 1c8593bd..5617f7ef 100755
--- a/examples/tiny/asr1/local/train.sh
+++ b/examples/tiny/asr1/local/train.sh
@@ -27,14 +27,25 @@ ckpt_name=$2
 
 mkdir -p exp
 
+if [ ${ngpu} == 0 ]; then
 python3 -u ${BIN_DIR}/train.py \
+--ngpu ${ngpu} \
 --seed ${seed} \
+--config ${config_path} \
+--output exp/${ckpt_name} \
+--profiler-options "${profiler_options}" \
+--benchmark-batch-size ${benchmark_batch_size} \
+--benchmark-max-step ${benchmark_max_step}
+else
+python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${BIN_DIR}/train.py \
 --ngpu ${ngpu} \
+--seed ${seed} \
 --config ${config_path} \
 --output exp/${ckpt_name} \
 --profiler-options "${profiler_options}" \
 --benchmark-batch-size ${benchmark_batch_size} \
 --benchmark-max-step ${benchmark_max_step}
+fi
 
 
 if [ ${seed} != 0  ]; then
diff --git a/paddlespeech/s2t/exps/deepspeech2/bin/train.py b/paddlespeech/s2t/exps/deepspeech2/bin/train.py
index 09e8662f..e2c68d4b 100644
--- a/paddlespeech/s2t/exps/deepspeech2/bin/train.py
+++ b/paddlespeech/s2t/exps/deepspeech2/bin/train.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Trainer for DeepSpeech2 model."""
-from paddle import distributed as dist
 from yacs.config import CfgNode
 
 from paddlespeech.s2t.exps.deepspeech2.model import DeepSpeech2Trainer as Trainer
@@ -27,10 +26,7 @@ def main_sp(config, args):
 
 
 def main(config, args):
-    if args.ngpu > 1:
-        dist.spawn(main_sp, args=(config, args), nprocs=args.ngpu)
-    else:
-        main_sp(config, args)
+    main_sp(config, args)
 
 
 if __name__ == "__main__":
diff --git a/paddlespeech/s2t/exps/u2/bin/train.py b/paddlespeech/s2t/exps/u2/bin/train.py
index 53c22328..dc3a87c1 100644
--- a/paddlespeech/s2t/exps/u2/bin/train.py
+++ b/paddlespeech/s2t/exps/u2/bin/train.py
@@ -15,7 +15,6 @@
 import cProfile
 import os
 
-from paddle import distributed as dist
 from yacs.config import CfgNode
 
 from paddlespeech.s2t.exps.u2.model import U2Trainer as Trainer
@@ -32,10 +31,7 @@ def main_sp(config, args):
 
 
 def main(config, args):
-    if args.ngpu > 1:
-        dist.spawn(main_sp, args=(config, args), nprocs=args.ngpu)
-    else:
-        main_sp(config, args)
+    main_sp(config, args)
 
 
 if __name__ == "__main__":
diff --git a/paddlespeech/s2t/exps/u2_kaldi/bin/train.py b/paddlespeech/s2t/exps/u2_kaldi/bin/train.py
index fcfc05a8..b11da715 100644
--- a/paddlespeech/s2t/exps/u2_kaldi/bin/train.py
+++ b/paddlespeech/s2t/exps/u2_kaldi/bin/train.py
@@ -15,7 +15,6 @@
 import cProfile
 import os
 
-from paddle import distributed as dist
 from yacs.config import CfgNode
 
 from paddlespeech.s2t.training.cli import default_argument_parser
@@ -36,10 +35,7 @@ def main_sp(config, args):
 
 
 def main(config, args):
-    if args.ngpu > 1:
-        dist.spawn(main_sp, args=(config, args), nprocs=args.ngpu)
-    else:
-        main_sp(config, args)
+    main_sp(config, args)
 
 
 if __name__ == "__main__":
diff --git a/paddlespeech/s2t/exps/u2_st/bin/train.py b/paddlespeech/s2t/exps/u2_st/bin/train.py
index 4dec9ec8..574942e5 100644
--- a/paddlespeech/s2t/exps/u2_st/bin/train.py
+++ b/paddlespeech/s2t/exps/u2_st/bin/train.py
@@ -15,7 +15,6 @@
 import cProfile
 import os
 
-from paddle import distributed as dist
 from yacs.config import CfgNode
 
 from paddlespeech.s2t.exps.u2_st.model import U2STTrainer as Trainer
@@ -30,10 +29,7 @@ def main_sp(config, args):
 
 
 def main(config, args):
-    if args.ngpu > 1:
-        dist.spawn(main_sp, args=(config, args), nprocs=args.ngpu)
-    else:
-        main_sp(config, args)
+    main_sp(config, args)
 
 
 if __name__ == "__main__":
diff --git a/paddlespeech/s2t/io/sampler.py b/paddlespeech/s2t/io/sampler.py
index ac55af12..89752bb9 100644
--- a/paddlespeech/s2t/io/sampler.py
+++ b/paddlespeech/s2t/io/sampler.py
@@ -51,7 +51,7 @@ def _batch_shuffle(indices, batch_size, epoch, clipped=False):
     """
     rng = np.random.RandomState(epoch)
     shift_len = rng.randint(0, batch_size - 1)
-    batch_indices = list(zip(* [iter(indices[shift_len:])] * batch_size))
+    batch_indices = list(zip(*[iter(indices[shift_len:])] * batch_size))
     rng.shuffle(batch_indices)
     batch_indices = [item for batch in batch_indices for item in batch]
     assert clipped is False
diff --git a/paddlespeech/t2s/modules/transformer/repeat.py b/paddlespeech/t2s/modules/transformer/repeat.py
index 1e946adf..2073a78b 100644
--- a/paddlespeech/t2s/modules/transformer/repeat.py
+++ b/paddlespeech/t2s/modules/transformer/repeat.py
@@ -36,4 +36,4 @@ def repeat(N, fn):
     Returns:
         MultiSequential: Repeated model instance.
     """
-    return MultiSequential(* [fn(n) for n in range(N)])
+    return MultiSequential(*[fn(n) for n in range(N)])