diff --git a/README.md b/README.md
index f17cec13..59c61f77 100644
--- a/README.md
+++ b/README.md
@@ -888,7 +888,7 @@ You are warmly welcome to submit questions in [discussions](https://github.com/P
 </p>
 
 ## Acknowledgement
-- Many thanks to [HighCWu](https://github.com/HighCWu)for adding [VITS-aishell3](./examples/aishell3/vits) and [VITS-VC](./examples/aishell3/vits-vc) examples.
+- Many thanks to [HighCWu](https://github.com/HighCWu) for adding [VITS-aishell3](./examples/aishell3/vits) and [VITS-VC](./examples/aishell3/vits-vc) examples.
 - Many thanks to [david-95](https://github.com/david-95) improved TTS, fixed multi-punctuation bug, and contributed to multiple program and data. 
 - Many thanks to [BarryKCL](https://github.com/BarryKCL) improved TTS Chinses frontend based on [G2PW](https://github.com/GitYCC/g2pW).
 - Many thanks to [yeyupiaoling](https://github.com/yeyupiaoling)/[PPASR](https://github.com/yeyupiaoling/PPASR)/[PaddlePaddle-DeepSpeech](https://github.com/yeyupiaoling/PaddlePaddle-DeepSpeech)/[VoiceprintRecognition-PaddlePaddle](https://github.com/yeyupiaoling/VoiceprintRecognition-PaddlePaddle)/[AudioClassification-PaddlePaddle](https://github.com/yeyupiaoling/AudioClassification-PaddlePaddle) for years of attention, constructive advice and great help.
diff --git a/examples/aishell3/ernie_sat/local/synthesize_e2e.sh b/examples/aishell3/ernie_sat/local/synthesize_e2e.sh
index b33e8ca0..77b353b5 100755
--- a/examples/aishell3/ernie_sat/local/synthesize_e2e.sh
+++ b/examples/aishell3/ernie_sat/local/synthesize_e2e.sh
@@ -13,9 +13,9 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
     FLAGS_fraction_of_gpu_memory_to_use=0.01 \
     python3 ${BIN_DIR}/synthesize_e2e.py \
         --task_name=synthesize \
-        --wav_path=source/SSB03540307.wav\
-        --old_str='请播放歌曲小苹果。' \
-        --new_str='歌曲真好听。' \
+        --wav_path=source/SSB03540307.wav \
+        --old_str='请播放歌曲小苹果' \
+        --new_str='歌曲真好听' \
         --source_lang=zh \
         --target_lang=zh \
         --erniesat_config=${config_path} \
diff --git a/examples/aishell3_vctk/ernie_sat/README.md b/examples/aishell3_vctk/ernie_sat/README.md
index 777bea32..a849488d 100644
--- a/examples/aishell3_vctk/ernie_sat/README.md
+++ b/examples/aishell3_vctk/ernie_sat/README.md
@@ -29,9 +29,11 @@ Or train your MFA model reference to [mfa example](https://github.com/PaddlePadd
 Assume the paths to the datasets are:
 - `~/datasets/data_aishell3` 
 - `~/datasets/VCTK-Corpus-0.92`
+
 Assume the path to the MFA results of the datasets are:
 - `./aishell3_alignment_tone`
 - `./vctk_alignment`
+
 Run the command below to
 1. **source path**.
 2. preprocess the dataset.
diff --git a/examples/aishell3_vctk/ernie_sat/local/synthesize_e2e.sh b/examples/aishell3_vctk/ernie_sat/local/synthesize_e2e.sh
index c30af6e8..446ac879 100755
--- a/examples/aishell3_vctk/ernie_sat/local/synthesize_e2e.sh
+++ b/examples/aishell3_vctk/ernie_sat/local/synthesize_e2e.sh
@@ -15,7 +15,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
     python3 ${BIN_DIR}/synthesize_e2e.py \
         --task_name=synthesize \
         --wav_path=source/p243_313.wav \
-        --old_str='For that reason cover should not be given.' \
+        --old_str='For that reason cover should not be given' \
         --new_str='今天天气很好' \
         --source_lang=en \
         --target_lang=zh \
@@ -36,8 +36,8 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
     python3 ${BIN_DIR}/synthesize_e2e.py \
         --task_name=synthesize \
         --wav_path=source/SSB03540307.wav \
-        --old_str='请播放歌曲小苹果。' \
-        --new_str="Thank you!" \
+        --old_str='请播放歌曲小苹果' \
+        --new_str="Thank you" \
         --source_lang=zh \
         --target_lang=en \
         --erniesat_config=${config_path} \
diff --git a/examples/vctk/ernie_sat/local/synthesize_e2e.sh b/examples/vctk/ernie_sat/local/synthesize_e2e.sh
index fee54016..dcc71044 100755
--- a/examples/vctk/ernie_sat/local/synthesize_e2e.sh
+++ b/examples/vctk/ernie_sat/local/synthesize_e2e.sh
@@ -14,7 +14,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
     python3 ${BIN_DIR}/synthesize_e2e.py \
         --task_name=synthesize \
         --wav_path=source/p243_313.wav \
-        --old_str='For that reason cover should not be given.' \
+        --old_str='For that reason cover should not be given' \
         --new_str='I love you very much do you love me' \
         --source_lang=en \
         --target_lang=en \
@@ -36,8 +36,8 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
     python3 ${BIN_DIR}/synthesize_e2e.py \
         --task_name=edit \
         --wav_path=source/p243_313.wav \
-        --old_str='For that reason cover should not be given.' \
-        --new_str='For that reason cover is not impossible to be given.' \
+        --old_str='For that reason cover should not be given' \
+        --new_str='For that reason cover is not impossible to be given' \
         --source_lang=en \
         --target_lang=en \
         --erniesat_config=${config_path} \
diff --git a/examples/voxceleb/sv0/README.md b/examples/voxceleb/sv0/README.md
index 26c95aca..7fe759eb 100644
--- a/examples/voxceleb/sv0/README.md
+++ b/examples/voxceleb/sv0/README.md
@@ -148,4 +148,4 @@ source path.sh
 
 CUDA_VISIBLE_DEVICES= bash ./local/test.sh ./data sv0_ecapa_tdnn_voxceleb12_ckpt_0_2_1/model/ conf/ecapa_tdnn.yaml
 ```
-The performance of the released models are shown in [this](./RESULTS.md)
+The performance of the released models are shown in [this](./RESULT.md)
diff --git a/examples/wenetspeech/asr1/RESULTS.md b/examples/wenetspeech/asr1/RESULTS.md
index cc209db7..af84a5f6 100644
--- a/examples/wenetspeech/asr1/RESULTS.md
+++ b/examples/wenetspeech/asr1/RESULTS.md
@@ -34,3 +34,15 @@ Pretrain model from http://mobvoi-speech-public.ufile.ucloud.cn/public/wenet/wen
 | conformer | 32.52 M | conf/conformer.yaml | spec_aug  | aishell1 | ctc_greedy_search | - | 0.052534 |  
 | conformer | 32.52 M | conf/conformer.yaml | spec_aug  | aishell1 | ctc_prefix_beam_search | - | 0.052915 |  
 | conformer | 32.52 M | conf/conformer.yaml | spec_aug  | aishell1 | attention_rescoring | - | 0.047904 |  
+
+
+## Conformer Steaming Pretrained Model
+
+Pretrain model from https://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr1/asr1_chunk_conformer_wenetspeech_ckpt_1.0.0a.model.tar.gz
+
+| Model | Params | Config | Augmentation| Test set | Decode method | Chunk Size | CER |  
+| --- | --- | --- | --- | --- | --- | --- | --- |
+| conformer | 32.52 M | conf/chunk_conformer.yaml | spec_aug  | aishell1 | attention | 16 | 0.056273 |  
+| conformer | 32.52 M | conf/chunk_conformer.yaml | spec_aug  | aishell1 | ctc_greedy_search | 16 | 0.078918 |  
+| conformer | 32.52 M | conf/chunk_conformer.yaml | spec_aug  | aishell1 | ctc_prefix_beam_search | 16 | 0.079080 |  
+| conformer | 32.52 M | conf/chunk_conformer.yaml | spec_aug  | aishell1 | attention_rescoring | 16 | 0.054401 |
diff --git a/paddlespeech/s2t/models/u2/u2.py b/paddlespeech/s2t/models/u2/u2.py
index 813e1e52..8a984949 100644
--- a/paddlespeech/s2t/models/u2/u2.py
+++ b/paddlespeech/s2t/models/u2/u2.py
@@ -605,8 +605,8 @@ class U2BaseModel(ASRInterface, nn.Layer):
             xs: paddle.Tensor,
             offset: int,
             required_cache_size: int,
-            att_cache: paddle.Tensor,  # paddle.zeros([0, 0, 0, 0])
-            cnn_cache: paddle.Tensor,  # paddle.zeros([0, 0, 0, 0])
+            att_cache: paddle.Tensor=paddle.zeros([0, 0, 0, 0]),
+            cnn_cache: paddle.Tensor=paddle.zeros([0, 0, 0, 0])
     ) -> Tuple[paddle.Tensor, paddle.Tensor, paddle.Tensor]:
         """ Export interface for c++ call, give input chunk xs, and return
             output from time 0 to current chunk.
diff --git a/paddlespeech/s2t/modules/attention.py b/paddlespeech/s2t/modules/attention.py
index 92990048..2d236743 100644
--- a/paddlespeech/s2t/modules/attention.py
+++ b/paddlespeech/s2t/modules/attention.py
@@ -86,7 +86,7 @@ class MultiHeadedAttention(nn.Layer):
             self,
             value: paddle.Tensor,
             scores: paddle.Tensor,
-            mask: paddle.Tensor,  # paddle.ones([0, 0, 0], dtype=paddle.bool)
+            mask: paddle.Tensor=paddle.ones([0, 0, 0], dtype=paddle.bool)
     ) -> paddle.Tensor:
         """Compute attention context vector.
         Args:
@@ -127,15 +127,14 @@ class MultiHeadedAttention(nn.Layer):
 
         return self.linear_out(x)  # (batch, time1, d_model)
 
-    def forward(
-            self,
-            query: paddle.Tensor,
-            key: paddle.Tensor,
-            value: paddle.Tensor,
-            mask: paddle.Tensor,  # paddle.ones([0,0,0], dtype=paddle.bool)
-            pos_emb: paddle.Tensor,  # paddle.empty([0])
-            cache: paddle.Tensor  # paddle.zeros([0,0,0,0])
-    ) -> Tuple[paddle.Tensor, paddle.Tensor]:
+    def forward(self,
+                query: paddle.Tensor,
+                key: paddle.Tensor,
+                value: paddle.Tensor,
+                mask: paddle.Tensor=paddle.ones([0, 0, 0], dtype=paddle.bool),
+                pos_emb: paddle.Tensor=paddle.empty([0]),
+                cache: paddle.Tensor=paddle.zeros([0, 0, 0, 0])
+                ) -> Tuple[paddle.Tensor, paddle.Tensor]:
         """Compute scaled dot product attention.
        Args:
             query (paddle.Tensor): Query tensor (#batch, time1, size).
@@ -244,15 +243,14 @@ class RelPositionMultiHeadedAttention(MultiHeadedAttention):
 
         return x
 
-    def forward(
-            self,
-            query: paddle.Tensor,
-            key: paddle.Tensor,
-            value: paddle.Tensor,
-            mask: paddle.Tensor,  # paddle.ones([0,0,0], dtype=paddle.bool)
-            pos_emb: paddle.Tensor,  # paddle.empty([0])
-            cache: paddle.Tensor  # paddle.zeros([0,0,0,0])
-    ) -> Tuple[paddle.Tensor, paddle.Tensor]:
+    def forward(self,
+                query: paddle.Tensor,
+                key: paddle.Tensor,
+                value: paddle.Tensor,
+                mask: paddle.Tensor=paddle.ones([0, 0, 0], dtype=paddle.bool),
+                pos_emb: paddle.Tensor=paddle.empty([0]),
+                cache: paddle.Tensor=paddle.zeros([0, 0, 0, 0])
+                ) -> Tuple[paddle.Tensor, paddle.Tensor]:
         """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
         Args:
             query (paddle.Tensor): Query tensor (#batch, time1, size).
diff --git a/paddlespeech/s2t/modules/conformer_convolution.py b/paddlespeech/s2t/modules/conformer_convolution.py
index b35fea5b..be605654 100644
--- a/paddlespeech/s2t/modules/conformer_convolution.py
+++ b/paddlespeech/s2t/modules/conformer_convolution.py
@@ -108,8 +108,8 @@ class ConvolutionModule(nn.Layer):
     def forward(
             self,
             x: paddle.Tensor,
-            mask_pad: paddle.Tensor,  # paddle.ones([0,0,0], dtype=paddle.bool)
-            cache: paddle.Tensor  # paddle.zeros([0,0,0,0])
+            mask_pad: paddle.Tensor=paddle.ones([0, 0, 0], dtype=paddle.bool),
+            cache: paddle.Tensor=paddle.zeros([0, 0, 0, 0])
     ) -> Tuple[paddle.Tensor, paddle.Tensor]:
         """Compute convolution module.
         Args:
diff --git a/paddlespeech/s2t/modules/decoder_layer.py b/paddlespeech/s2t/modules/decoder_layer.py
index c8843b72..37b124e8 100644
--- a/paddlespeech/s2t/modules/decoder_layer.py
+++ b/paddlespeech/s2t/modules/decoder_layer.py
@@ -121,16 +121,11 @@ class DecoderLayer(nn.Layer):
 
         if self.concat_after:
             tgt_concat = paddle.cat(
-                (tgt_q, self.self_attn(tgt_q, tgt, tgt, tgt_q_mask,
-                                       paddle.empty([0]),
-                                       paddle.zeros([0, 0, 0, 0]))[0]),
-                dim=-1)
+                (tgt_q, self.self_attn(tgt_q, tgt, tgt, tgt_q_mask)[0]), dim=-1)
             x = residual + self.concat_linear1(tgt_concat)
         else:
             x = residual + self.dropout(
-                self.self_attn(tgt_q, tgt, tgt, tgt_q_mask,
-                               paddle.empty([0]), paddle.zeros([0, 0, 0, 0]))[
-                                   0])
+                self.self_attn(tgt_q, tgt, tgt, tgt_q_mask)[0])
         if not self.normalize_before:
             x = self.norm1(x)
 
@@ -139,15 +134,11 @@ class DecoderLayer(nn.Layer):
             x = self.norm2(x)
         if self.concat_after:
             x_concat = paddle.cat(
-                (x, self.src_attn(x, memory, memory, memory_mask,
-                                  paddle.empty([0]),
-                                  paddle.zeros([0, 0, 0, 0]))[0]),
-                dim=-1)
+                (x, self.src_attn(x, memory, memory, memory_mask)[0]), dim=-1)
             x = residual + self.concat_linear2(x_concat)
         else:
             x = residual + self.dropout(
-                self.src_attn(x, memory, memory, memory_mask,
-                              paddle.empty([0]), paddle.zeros([0, 0, 0, 0]))[0])
+                self.src_attn(x, memory, memory, memory_mask)[0])
         if not self.normalize_before:
             x = self.norm2(x)
 
diff --git a/paddlespeech/s2t/modules/encoder.py b/paddlespeech/s2t/modules/encoder.py
index cf4e32fa..2f4ad1b2 100644
--- a/paddlespeech/s2t/modules/encoder.py
+++ b/paddlespeech/s2t/modules/encoder.py
@@ -175,9 +175,7 @@ class BaseEncoder(nn.Layer):
             decoding_chunk_size, self.static_chunk_size,
             num_decoding_left_chunks)
         for layer in self.encoders:
-            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad,
-                                          paddle.zeros([0, 0, 0, 0]),
-                                          paddle.zeros([0, 0, 0, 0]))
+            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad)
         if self.normalize_before:
             xs = self.after_norm(xs)
         # Here we assume the mask is not changed in encoder layers, so just
@@ -190,9 +188,9 @@ class BaseEncoder(nn.Layer):
             xs: paddle.Tensor,
             offset: int,
             required_cache_size: int,
-            att_cache: paddle.Tensor,  # paddle.zeros([0,0,0,0])
-            cnn_cache: paddle.Tensor,  # paddle.zeros([0,0,0,0]),
-            att_mask: paddle.Tensor,  # paddle.ones([0,0,0], dtype=paddle.bool)
+            att_cache: paddle.Tensor=paddle.zeros([0, 0, 0, 0]),
+            cnn_cache: paddle.Tensor=paddle.zeros([0, 0, 0, 0]),
+            att_mask: paddle.Tensor=paddle.ones([0, 0, 0], dtype=paddle.bool)
     ) -> Tuple[paddle.Tensor, paddle.Tensor, paddle.Tensor]:
         """ Forward just one chunk
         Args:
@@ -255,7 +253,6 @@ class BaseEncoder(nn.Layer):
                 xs,
                 att_mask,
                 pos_emb,
-                mask_pad=paddle.ones([0, 0, 0], dtype=paddle.bool),
                 att_cache=att_cache[i:i + 1] if elayers > 0 else att_cache,
                 cnn_cache=cnn_cache[i:i + 1]
                 if paddle.shape(cnn_cache)[0] > 0 else cnn_cache, )
@@ -328,8 +325,7 @@ class BaseEncoder(nn.Layer):
             chunk_xs = xs[:, cur:end, :]
 
             (y, att_cache, cnn_cache) = self.forward_chunk(
-                chunk_xs, offset, required_cache_size, att_cache, cnn_cache,
-                paddle.ones([0, 0, 0], dtype=paddle.bool))
+                chunk_xs, offset, required_cache_size, att_cache, cnn_cache)
 
             outputs.append(y)
             offset += y.shape[1]
diff --git a/paddlespeech/s2t/modules/encoder_layer.py b/paddlespeech/s2t/modules/encoder_layer.py
index 4555b535..dac62bce 100644
--- a/paddlespeech/s2t/modules/encoder_layer.py
+++ b/paddlespeech/s2t/modules/encoder_layer.py
@@ -76,10 +76,9 @@ class TransformerEncoderLayer(nn.Layer):
             x: paddle.Tensor,
             mask: paddle.Tensor,
             pos_emb: paddle.Tensor,
-            mask_pad: paddle.
-            Tensor,  # paddle.ones([0, 0, 0], dtype=paddle.bool)
-            att_cache: paddle.Tensor,  # paddle.zeros([0, 0, 0, 0])
-            cnn_cache: paddle.Tensor,  # paddle.zeros([0, 0, 0, 0])
+            mask_pad: paddle.Tensor=paddle.ones([0, 0, 0], dtype=paddle.bool),
+            att_cache: paddle.Tensor=paddle.zeros([0, 0, 0, 0]),
+            cnn_cache: paddle.Tensor=paddle.zeros([0, 0, 0, 0])
     ) -> Tuple[paddle.Tensor, paddle.Tensor, paddle.Tensor, paddle.Tensor]:
         """Compute encoded features.
         Args:
@@ -106,8 +105,7 @@ class TransformerEncoderLayer(nn.Layer):
         if self.normalize_before:
             x = self.norm1(x)
 
-        x_att, new_att_cache = self.self_attn(
-            x, x, x, mask, paddle.empty([0]), cache=att_cache)
+        x_att, new_att_cache = self.self_attn(x, x, x, mask, cache=att_cache)
 
         if self.concat_after:
             x_concat = paddle.concat((x, x_att), axis=-1)
@@ -195,9 +193,9 @@ class ConformerEncoderLayer(nn.Layer):
             x: paddle.Tensor,
             mask: paddle.Tensor,
             pos_emb: paddle.Tensor,
-            mask_pad: paddle.Tensor,  #paddle.ones([0, 0, 0],dtype=paddle.bool)
-            att_cache: paddle.Tensor,  # paddle.zeros([0, 0, 0, 0])
-            cnn_cache: paddle.Tensor,  # paddle.zeros([0, 0, 0, 0])
+            mask_pad: paddle.Tensor=paddle.ones([0, 0, 0], dtype=paddle.bool),
+            att_cache: paddle.Tensor=paddle.zeros([0, 0, 0, 0]),
+            cnn_cache: paddle.Tensor=paddle.zeros([0, 0, 0, 0])
     ) -> Tuple[paddle.Tensor, paddle.Tensor, paddle.Tensor, paddle.Tensor]:
         """Compute encoded features.
         Args:
diff --git a/paddlespeech/s2t/training/trainer.py b/paddlespeech/s2t/training/trainer.py
index a7eb9892..4a69d78a 100644
--- a/paddlespeech/s2t/training/trainer.py
+++ b/paddlespeech/s2t/training/trainer.py
@@ -19,6 +19,10 @@ from pathlib import Path
 
 import paddle
 from paddle import distributed as dist
+world_size = dist.get_world_size()
+if world_size > 1:
+    dist.init_parallel_env()
+
 from visualdl import LogWriter
 
 from paddlespeech.s2t.training.reporter import ObsScope
@@ -122,9 +126,6 @@ class Trainer():
         else:
             raise Exception("invalid device")
 
-        if self.parallel:
-            self.init_parallel()
-
         self.checkpoint = Checkpoint(
             kbest_n=self.config.checkpoint.kbest_n,
             latest_n=self.config.checkpoint.latest_n)
@@ -173,11 +174,6 @@ class Trainer():
         """
         return self.args.ngpu > 1
 
-    def init_parallel(self):
-        """Init environment for multiprocess training.
-        """
-        dist.init_parallel_env()
-
     @mp_tools.rank_zero_only
     def save(self, tag=None, infos: dict=None):
         """Save checkpoint (model parameters and optimizer states).
diff --git a/paddlespeech/server/engine/asr/online/python/asr_engine.py b/paddlespeech/server/engine/asr/online/python/asr_engine.py
index 87d88ee6..5782d703 100644
--- a/paddlespeech/server/engine/asr/online/python/asr_engine.py
+++ b/paddlespeech/server/engine/asr/online/python/asr_engine.py
@@ -480,8 +480,7 @@ class PaddleASRConnectionHanddler:
                  self.offset,
                  required_cache_size,
                  att_cache=self.att_cache,
-                 cnn_cache=self.cnn_cache,
-                 att_mask=paddle.ones([0, 0, 0], dtype=paddle.bool))
+                 cnn_cache=self.cnn_cache)
             outputs.append(y)
 
             # update the global offset, in decoding frame unit
diff --git a/paddlespeech/t2s/exps/ernie_sat/align.py b/paddlespeech/t2s/exps/ernie_sat/align.py
index 464f51a3..8dbe685f 100755
--- a/paddlespeech/t2s/exps/ernie_sat/align.py
+++ b/paddlespeech/t2s/exps/ernie_sat/align.py
@@ -58,7 +58,7 @@ def _readtg(tg_path: str, lang: str='en', fs: int=24000, n_shift: int=300):
         durations[-2] += durations[-1]
         durations = durations[:-1]
 
-    # replace ' and 'sil' with 'sp'
+    # replace '' and 'sil' with 'sp'
     phones = ['sp' if (phn == '' or phn == 'sil') else phn for phn in phones]
 
     if lang == 'en':
@@ -195,7 +195,7 @@ def words2phns(text: str, lang='en'):
             wrd = wrd.upper()
         if (wrd not in ds):
             wrd2phns[str(index) + '_' + wrd] = 'spn'
-            phns.extend('spn')
+            phns.extend(['spn'])
         else:
             wrd2phns[str(index) + '_' + wrd] = word2phns_dict[wrd].split()
             phns.extend(word2phns_dict[wrd].split())
diff --git a/paddlespeech/t2s/exps/ernie_sat/synthesize_e2e.py b/paddlespeech/t2s/exps/ernie_sat/synthesize_e2e.py
index 21c9ae04..e450aa1a 100644
--- a/paddlespeech/t2s/exps/ernie_sat/synthesize_e2e.py
+++ b/paddlespeech/t2s/exps/ernie_sat/synthesize_e2e.py
@@ -137,9 +137,6 @@ def prep_feats_with_dur(wav_path: str,
     new_wav = np.concatenate(
         [wav_org[:wav_left_idx], blank_wav, wav_org[wav_right_idx:]])
 
-    # 音频是正常遮住了
-    sf.write(str("mask_wav.wav"), new_wav, samplerate=fs)
-
     # 4. get old and new mel span to be mask
     old_span_bdy = get_span_bdy(
         mfa_start=mfa_start, mfa_end=mfa_end, span_to_repl=span_to_repl)
@@ -274,7 +271,8 @@ def get_wav(wav_path: str,
             new_str: str='',
             duration_adjust: bool=True,
             fs: int=24000,
-            n_shift: int=300):
+            n_shift: int=300,
+            task_name: str='synthesize'):
 
     outs = get_mlm_output(
         wav_path=wav_path,
@@ -298,9 +296,11 @@ def get_wav(wav_path: str,
     alt_wav = np.squeeze(alt_wav)
 
     old_time_bdy = [n_shift * x for x in old_span_bdy]
-    wav_replaced = np.concatenate(
-        [wav_org[:old_time_bdy[0]], alt_wav, wav_org[old_time_bdy[1]:]])
-
+    if task_name == 'edit':
+        wav_replaced = np.concatenate(
+            [wav_org[:old_time_bdy[0]], alt_wav, wav_org[old_time_bdy[1]:]])
+    else:
+        wav_replaced = alt_wav
     wav_dict = {"origin": wav_org, "output": wav_replaced}
     return wav_dict
 
@@ -356,7 +356,11 @@ def parse_args():
         "--ngpu", type=int, default=1, help="if ngpu == 0, use cpu.")
 
     # ernie sat related
-    parser.add_argument("--task_name", type=str, help="task name")
+    parser.add_argument(
+        "--task_name",
+        type=str,
+        choices=['edit', 'synthesize'],
+        help="task name.")
     parser.add_argument("--wav_path", type=str, help="path of old wav")
     parser.add_argument("--old_str", type=str, help="old string")
     parser.add_argument("--new_str", type=str, help="new string")
@@ -410,10 +414,9 @@ if __name__ == '__main__':
     if args.task_name == 'edit':
         new_str = new_str
     elif args.task_name == 'synthesize':
-        new_str = old_str + new_str
+        new_str = old_str + ' ' + new_str
     else:
-        new_str = old_str + new_str
-    print("new_str:", new_str)
+        new_str = old_str + ' ' + new_str
 
     # Extractor
     mel_extractor = LogMelFBank(
@@ -467,7 +470,8 @@ if __name__ == '__main__':
         new_str=new_str,
         duration_adjust=args.duration_adjust,
         fs=erniesat_config.fs,
-        n_shift=erniesat_config.n_shift)
+        n_shift=erniesat_config.n_shift,
+        task_name=args.task_name)
 
     sf.write(
         args.output_name, wav_dict['output'], samplerate=erniesat_config.fs)
diff --git a/tests/test_tipc/prepare.sh b/tests/test_tipc/prepare.sh
old mode 100644
new mode 100755
index 2a227281..cb05a1d0
--- a/tests/test_tipc/prepare.sh
+++ b/tests/test_tipc/prepare.sh
@@ -15,6 +15,7 @@ dataline=$(cat ${FILENAME})
 # parser params
 IFS=$'\n'
 lines=(${dataline})
+python=python
 
 # The training params
 model_name=$(func_parser_value "${lines[1]}")
@@ -68,7 +69,7 @@ if [[ ${MODE} = "benchmark_train" ]];then
 
     if [[ ${model_name} == "pwgan" ]]; then
         # 下载 csmsc 数据集并解压缩
-        wget -nc https://weixinxcxdb.oss-cn-beijing.aliyuncs.com/gwYinPinKu/BZNSYP.rar
+        wget -nc https://paddle-wheel.bj.bcebos.com/benchmark/BZNSYP.rar
         mkdir -p BZNSYP
         unrar x BZNSYP.rar BZNSYP
         wget -nc https://paddlespeech.bj.bcebos.com/Parakeet/benchmark/durations.txt
@@ -80,6 +81,10 @@ if [[ ${MODE} = "benchmark_train" ]];then
         python ../paddlespeech/t2s/exps/gan_vocoder/normalize.py --metadata=dump/test/raw/metadata.jsonl --dumpdir=dump/test/norm --stats=dump/train/feats_stats.npy
     fi
 
+    echo "barrier start"
+    PYTHON="${python}" bash test_tipc/barrier.sh
+    echo "barrier end"
+
     if [[ ${model_name} == "mdtc" ]]; then
         # 下载 Snips 数据集并解压缩
         wget https://paddlespeech.bj.bcebos.com/datasets/hey_snips_kws_4.0.tar.gz.1