diff --git a/examples/aishell/asr0/local/test_hub_ori b/examples/aishell/asr0/local/test_hub_ori
deleted file mode 100755
index ee1fb805..00000000
--- a/examples/aishell/asr0/local/test_hub_ori
+++ /dev/null
@@ -1,47 +0,0 @@
-#!/bin/bash
-
-if [ $# != 4 ];then
-    echo "usage: ${0} config_path ckpt_path_prefix model_type audio_file"
-    exit -1
-fi
-
-ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
-echo "using $ngpu gpus..."
-
-config_path=$1
-ckpt_prefix=$2
-model_type=$3
-audio_file=$4
-
-mkdir -p data
-wget -nc https://paddlespeech.bj.bcebos.com/datasets/single_wav/zh/demo_01_03.wav -P data/
-if [ $? -ne 0 ]; then
-   exit 1
-fi
-
-if [ ! -f ${audio_file} ]; then
-    echo "Plase input the right audio_file path"
-    exit 1
-fi
-
-# download language model
-bash local/download_lm_ch.sh
-if [ $? -ne 0 ]; then
-   exit 1
-fi
-
-python3 -u ${BIN_DIR}/test_hub.py \
---nproc ${ngpu} \
---config ${config_path} \
---result_file ${ckpt_prefix}.rsl \
---checkpoint_path ${ckpt_prefix} \
---model_type ${model_type} \
---audio_file ${audio_file}
-
-if [ $? -ne 0 ]; then
-    echo "Failed in evaluation!"
-    exit 1
-fi
-
-
-exit 0
diff --git a/examples/other/1xt2x/src_deepspeech2x/bin/test.py b/examples/other/1xt2x/src_deepspeech2x/bin/test.py
index b404cce8..88a13fdc 100644
--- a/examples/other/1xt2x/src_deepspeech2x/bin/test.py
+++ b/examples/other/1xt2x/src_deepspeech2x/bin/test.py
@@ -15,7 +15,6 @@
 from src_deepspeech2x.test_model import DeepSpeech2Tester as Tester
 from yacs.config import CfgNode
 
-from paddlespeech.s2t.exps.deepspeech2.config import get_cfg_defaults
 from paddlespeech.s2t.training.cli import default_argument_parser
 from paddlespeech.s2t.utils.utility import print_arguments
 
@@ -42,7 +41,7 @@ if __name__ == "__main__":
     print("model_type:{}".format(args.model_type))
 
     # https://yaml.org/type/float.html
-    config = get_cfg_defaults(args.model_type)
+    config = CfgNode(new_allowed=True)
     if args.config:
         config.merge_from_file(args.config)
     if args.decode_cfg:
diff --git a/examples/other/1xt2x/src_deepspeech2x/models/ds2/deepspeech2.py b/examples/other/1xt2x/src_deepspeech2x/models/ds2/deepspeech2.py
index 4c20ffcd..003b02e2 100644
--- a/examples/other/1xt2x/src_deepspeech2x/models/ds2/deepspeech2.py
+++ b/examples/other/1xt2x/src_deepspeech2x/models/ds2/deepspeech2.py
@@ -120,20 +120,6 @@ class DeepSpeech2Model(nn.Layer):
     :rtype: tuple of LayerOutput
     """
 
-    @classmethod
-    def params(cls, config: Optional[CfgNode]=None) -> CfgNode:
-        default = CfgNode(
-            dict(
-                num_conv_layers=2,  #Number of stacking convolution layers.
-                num_rnn_layers=3,  #Number of stacking RNN layers.
-                rnn_layer_size=1024,  #RNN layer size (number of RNN cells).
-                use_gru=True,  #Use gru if set True. Use simple rnn if set False.
-                share_rnn_weights=True  #Whether to share input-hidden weights between forward and backward directional RNNs.Notice that for GRU, weight sharing is not supported.
-            ))
-        if config is not None:
-            config.merge_from_other_cfg(default)
-        return default
-
     def __init__(self,
                  feat_size,
                  dict_size,
diff --git a/examples/other/1xt2x/src_deepspeech2x/test_model.py b/examples/other/1xt2x/src_deepspeech2x/test_model.py
index 53a4e629..246fb107 100644
--- a/examples/other/1xt2x/src_deepspeech2x/test_model.py
+++ b/examples/other/1xt2x/src_deepspeech2x/test_model.py
@@ -44,22 +44,6 @@ logger = Log(__name__).getlog()
 
 
 class DeepSpeech2Trainer(Trainer):
-    @classmethod
-    def params(cls, config: Optional[CfgNode]=None) -> CfgNode:
-        # training config
-        default = CfgNode(
-            dict(
-                lr=5e-4,  # learning rate
-                lr_decay=1.0,  # learning rate decay
-                weight_decay=1e-6,  # the coeff of weight decay
-                global_grad_clip=5.0,  # the global norm clip
-                n_epoch=50,  # train epochs
-            ))
-
-        if config is not None:
-            config.merge_from_other_cfg(default)
-        return default
-
     def __init__(self, config, args):
         super().__init__(config, args)
 
@@ -246,27 +230,6 @@ class DeepSpeech2Trainer(Trainer):
 
 
 class DeepSpeech2Tester(DeepSpeech2Trainer):
-    @classmethod
-    def params(cls, config: Optional[CfgNode]=None) -> CfgNode:
-        # testing config
-        default = CfgNode(
-            dict(
-                alpha=2.5,  # Coef of LM for beam search.
-                beta=0.3,  # Coef of WC for beam search.
-                cutoff_prob=1.0,  # Cutoff probability for pruning.
-                cutoff_top_n=40,  # Cutoff number for pruning.
-                lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm',  # Filepath for language model.
-                decoding_method='ctc_beam_search',  # Decoding method. Options: ctc_beam_search, ctc_greedy
-                error_rate_type='wer',  # Error rate type for evaluation. Options `wer`, 'cer'
-                num_proc_bsearch=8,  # # of CPUs for beam search.
-                beam_size=500,  # Beam search width.
-                batch_size=128,  # decoding batch size
-            ))
-
-        if config is not None:
-            config.merge_from_other_cfg(default)
-        return default
-
     def __init__(self, config, args):
 
         self._text_featurizer = TextFeaturizer(
diff --git a/examples/ted_en_zh/st0/conf/preprocess.yaml b/examples/ted_en_zh/st0/conf/preprocess.yaml
new file mode 100644
index 00000000..d3992cb9
--- /dev/null
+++ b/examples/ted_en_zh/st0/conf/preprocess.yaml
@@ -0,0 +1,25 @@
+process:
+  # extract kaldi fbank from PCM
+  - type: fbank_kaldi
+    fs: 16000
+    n_mels: 80
+    n_shift: 160
+    win_length: 400
+    dither: 0.1
+  - type: cmvn_json
+    cmvn_path: data/mean_std.json
+  # these three processes are a.k.a. SpecAugument
+  - type: time_warp
+    max_time_warp: 5
+    inplace: true
+    mode: PIL
+  - type: freq_mask
+    F: 30
+    n_mask: 2
+    inplace: true
+    replace_with_zero: false
+  - type: time_mask
+    T: 40
+    n_mask: 2
+    inplace: true
+    replace_with_zero: false
diff --git a/examples/ted_en_zh/st0/conf/transformer.yaml b/examples/ted_en_zh/st0/conf/transformer.yaml
index a154621d..d113fc94 100644
--- a/examples/ted_en_zh/st0/conf/transformer.yaml
+++ b/examples/ted_en_zh/st0/conf/transformer.yaml
@@ -19,7 +19,7 @@ vocab_filepath: data/lang_char/vocab.txt
 unit_type: 'spm'
 spm_model_prefix: data/lang_char/bpe_unigram_8000
 mean_std_filepath: ""
-augmentation_config: conf/preprocess.yaml
+preprocess_config: conf/preprocess.yaml
 batch_size: 16
 maxlen_in: 5  # if input length  > maxlen-in, batchsize is automatically reduced
 maxlen_out: 150  # if output length > maxlen-out, batchsize is automatically reduced
@@ -87,7 +87,7 @@ global_grad_clip: 5.0
 optim: adam
 optim_conf:
   lr: 2.5
-  weight_decay: 1e-06
+  weight_decay: 1.0e-06
 scheduler: noam    
 scheduler_conf:
   warmup_steps: 25000
diff --git a/examples/ted_en_zh/st0/conf/transformer_mtl_noam.yaml b/examples/ted_en_zh/st0/conf/transformer_mtl_noam.yaml
index c80dea7e..a01ec1a6 100644
--- a/examples/ted_en_zh/st0/conf/transformer_mtl_noam.yaml
+++ b/examples/ted_en_zh/st0/conf/transformer_mtl_noam.yaml
@@ -19,7 +19,7 @@ vocab_filepath: data/lang_char/vocab.txt
 unit_type: 'spm'
 spm_model_prefix: data/lang_char/bpe_unigram_8000
 mean_std_filepath: ""
-augmentation_config: conf/preprocess.yaml
+preprocess_config: conf/preprocess.yaml
 batch_size: 16
 maxlen_in: 5  # if input length  > maxlen-in, batchsize is automatically reduced
 maxlen_out: 150  # if output length > maxlen-out, batchsize is automatically reduced
diff --git a/examples/ted_en_zh/st1/conf/preprocess.yaml b/examples/ted_en_zh/st1/conf/preprocess.yaml
new file mode 100644
index 00000000..bc86d98c
--- /dev/null
+++ b/examples/ted_en_zh/st1/conf/preprocess.yaml
@@ -0,0 +1,16 @@
+process:
+  # these three processes are a.k.a. SpecAugument
+  - type: time_warp
+    max_time_warp: 5
+    inplace: true
+    mode: PIL
+  - type: freq_mask
+    F: 30
+    n_mask: 2
+    inplace: true
+    replace_with_zero: false
+  - type: time_mask
+    T: 40
+    n_mask: 2
+    inplace: true
+    replace_with_zero: false
diff --git a/examples/ted_en_zh/st1/conf/transformer.yaml b/examples/ted_en_zh/st1/conf/transformer.yaml
index 05674562..515edee2 100644
--- a/examples/ted_en_zh/st1/conf/transformer.yaml
+++ b/examples/ted_en_zh/st1/conf/transformer.yaml
@@ -13,7 +13,7 @@ vocab_filepath: data/lang_char/ted_en_zh_bpe8000.txt
 unit_type: 'spm'
 spm_model_prefix: data/lang_char/ted_en_zh_bpe8000
 mean_std_filepath: ""
-# augmentation_config: conf/augmentation.json
+# preprocess_config: conf/augmentation.json
 batch_size: 20
 feat_dim: 83
 stride_ms: 10.0
@@ -27,7 +27,7 @@ batch_bins: 0
 batch_frames_in: 0
 batch_frames_out: 0
 batch_frames_inout: 0
-augmentation_config:
+preprocess_config:
 num_workers: 0
 subsampling_factor: 1
 num_encs: 1
diff --git a/examples/ted_en_zh/st1/conf/transformer_mtl_noam.yaml b/examples/ted_en_zh/st1/conf/transformer_mtl_noam.yaml
index ae246cbf..a5f956fa 100644
--- a/examples/ted_en_zh/st1/conf/transformer_mtl_noam.yaml
+++ b/examples/ted_en_zh/st1/conf/transformer_mtl_noam.yaml
@@ -13,7 +13,7 @@ vocab_filepath: data/lang_char/ted_en_zh_bpe8000.txt
 unit_type: 'spm'
 spm_model_prefix: data/lang_char/ted_en_zh_bpe8000
 mean_std_filepath: ""
-# augmentation_config: conf/augmentation.json
+# preprocess_config: conf/augmentation.json
 batch_size: 20
 feat_dim: 83
 stride_ms: 10.0
@@ -27,7 +27,7 @@ batch_bins: 0
 batch_frames_in: 0
 batch_frames_out: 0
 batch_frames_inout: 0
-augmentation_config:
+preprocess_config:
 num_workers: 0
 subsampling_factor: 1
 num_encs: 1
diff --git a/examples/ted_en_zh/st1/local/test.sh b/examples/ted_en_zh/st1/local/test.sh
index 9e24380d..be6f25f9 100755
--- a/examples/ted_en_zh/st1/local/test.sh
+++ b/examples/ted_en_zh/st1/local/test.sh
@@ -20,12 +20,7 @@ for type in fullsentence; do
     --decode_cfg ${decode_config_path} \
     --result_file ${ckpt_prefix}.${type}.rsl \
     --checkpoint_path ${ckpt_prefix} \
-<<<<<<< HEAD
-    --opts decode.decoding_method ${type} \
-    --opts decode.decode_batch_size ${batch_size}
-=======
     --opts decoding.decoding_method ${type} \
->>>>>>> 6272496d9c26736750b577fd832ea9dd4ddc4e6e
 
     if [ $? -ne 0 ]; then
         echo "Failed in evaluation!"
diff --git a/examples/tiny/asr1/conf/chunk_confermer.yaml b/examples/tiny/asr1/conf/chunk_confermer.yaml
index cd072c14..8f785121 100644
--- a/examples/tiny/asr1/conf/chunk_confermer.yaml
+++ b/examples/tiny/asr1/conf/chunk_confermer.yaml
@@ -58,7 +58,6 @@ mean_std_filepath: ""
 vocab_filepath: data/lang_char/vocab.txt 
 unit_type: 'spm'
 spm_model_prefix: 'data/lang_char/bpe_unigram_200'
-preprocess_config: conf/preprocess.yaml
 feat_dim: 80
 stride_ms: 10.0
 window_ms: 25.0
@@ -72,7 +71,7 @@ batch_bins: 0
 batch_frames_in: 0
 batch_frames_out: 0
 batch_frames_inout: 0
-augmentation_config: conf/preprocess.yaml 
+preprocess_config: conf/preprocess.yaml 
 num_workers: 0
 subsampling_factor: 1
 num_encs: 1
diff --git a/paddlespeech/s2t/decoders/recog.py b/paddlespeech/s2t/decoders/recog.py
index 3e9939f0..88955eac 100644
--- a/paddlespeech/s2t/decoders/recog.py
+++ b/paddlespeech/s2t/decoders/recog.py
@@ -85,7 +85,7 @@ def recog_v2(args):
         mode="asr",
         load_output=False,
         sort_in_input_length=False,
-        preprocess_conf=confs.collator.augmentation_config
+        preprocess_conf=confs.preprocess_config
         if args.preprocess_conf is None else args.preprocess_conf,
         preprocess_args={"train": False}, )
 
diff --git a/paddlespeech/s2t/exps/deepspeech2/bin/deploy/runtime.py b/paddlespeech/s2t/exps/deepspeech2/bin/deploy/runtime.py
index 88148323..ccb85906 100644
--- a/paddlespeech/s2t/exps/deepspeech2/bin/deploy/runtime.py
+++ b/paddlespeech/s2t/exps/deepspeech2/bin/deploy/runtime.py
@@ -20,7 +20,7 @@ from paddle.inference import Config
 from paddle.inference import create_predictor
 from paddle.io import DataLoader
 
-from paddlespeech.s2t.exps.deepspeech2.config import get_cfg_defaults
+from yacs.config import CfgNode
 from paddlespeech.s2t.io.collator import SpeechCollator
 from paddlespeech.s2t.io.dataset import ManifestDataset
 from paddlespeech.s2t.models.ds2 import DeepSpeech2Model
@@ -176,7 +176,7 @@ if __name__ == "__main__":
     print_arguments(args, globals())
 
     # https://yaml.org/type/float.html
-    config = get_cfg_defaults()
+    config = CfgNode(new_allowed=True)
     if args.config:
         config.merge_from_file(args.config)
     if args.decode_cfg:
diff --git a/paddlespeech/s2t/exps/deepspeech2/bin/deploy/server.py b/paddlespeech/s2t/exps/deepspeech2/bin/deploy/server.py
index dea6d975..85c2466f 100644
--- a/paddlespeech/s2t/exps/deepspeech2/bin/deploy/server.py
+++ b/paddlespeech/s2t/exps/deepspeech2/bin/deploy/server.py
@@ -18,7 +18,7 @@ import numpy as np
 import paddle
 from paddle.io import DataLoader
 
-from paddlespeech.s2t.exps.deepspeech2.config import get_cfg_defaults
+from yacs.config import CfgNode
 from paddlespeech.s2t.io.collator import SpeechCollator
 from paddlespeech.s2t.io.dataset import ManifestDataset
 from paddlespeech.s2t.models.ds2 import DeepSpeech2Model
@@ -111,7 +111,7 @@ if __name__ == "__main__":
     print_arguments(args, globals())
 
     # https://yaml.org/type/float.html
-    config = get_cfg_defaults()
+    config = CfgNode(new_allowed=True)
     if args.config:
         config.merge_from_file(args.config)
     if args.decode_cfg:
diff --git a/paddlespeech/s2t/exps/deepspeech2/bin/export.py b/paddlespeech/s2t/exps/deepspeech2/bin/export.py
index 66042e84..090b5fab 100644
--- a/paddlespeech/s2t/exps/deepspeech2/bin/export.py
+++ b/paddlespeech/s2t/exps/deepspeech2/bin/export.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Export for DeepSpeech2 model."""
-from paddlespeech.s2t.exps.deepspeech2.config import get_cfg_defaults
+from yacs.config import CfgNode
 from paddlespeech.s2t.exps.deepspeech2.model import DeepSpeech2Tester as Tester
 from paddlespeech.s2t.training.cli import default_argument_parser
 from paddlespeech.s2t.utils.utility import print_arguments
@@ -41,7 +41,7 @@ if __name__ == "__main__":
     print_arguments(args)
 
     # https://yaml.org/type/float.html
-    config = get_cfg_defaults(args.model_type)
+    config = CfgNode(new_allowed=True)
     if args.config:
         config.merge_from_file(args.config)
     if args.opts:
diff --git a/paddlespeech/s2t/exps/deepspeech2/bin/test.py b/paddlespeech/s2t/exps/deepspeech2/bin/test.py
index 7ce921d6..388b380d 100644
--- a/paddlespeech/s2t/exps/deepspeech2/bin/test.py
+++ b/paddlespeech/s2t/exps/deepspeech2/bin/test.py
@@ -14,7 +14,6 @@
 """Evaluation for DeepSpeech2 model."""
 from yacs.config import CfgNode
 
-from paddlespeech.s2t.exps.deepspeech2.config import get_cfg_defaults
 from paddlespeech.s2t.exps.deepspeech2.model import DeepSpeech2Tester as Tester
 from paddlespeech.s2t.training.cli import default_argument_parser
 from paddlespeech.s2t.utils.utility import print_arguments
@@ -43,7 +42,7 @@ if __name__ == "__main__":
     print("model_type:{}".format(args.model_type))
 
     # https://yaml.org/type/float.html
-    config = get_cfg_defaults(args.model_type)
+    config = CfgNode(new_allowed=True)
     if args.config:
         config.merge_from_file(args.config)
     if args.decode_cfg:
diff --git a/paddlespeech/s2t/exps/deepspeech2/bin/test_export.py b/paddlespeech/s2t/exps/deepspeech2/bin/test_export.py
index 7a1801d4..176028ed 100644
--- a/paddlespeech/s2t/exps/deepspeech2/bin/test_export.py
+++ b/paddlespeech/s2t/exps/deepspeech2/bin/test_export.py
@@ -13,8 +13,6 @@
 # limitations under the License.
 """Evaluation for DeepSpeech2 model."""
 from yacs.config import CfgNode
-
-from paddlespeech.s2t.exps.deepspeech2.config import get_cfg_defaults
 from paddlespeech.s2t.exps.deepspeech2.model import DeepSpeech2ExportTester as ExportTester
 from paddlespeech.s2t.training.cli import default_argument_parser
 from paddlespeech.s2t.utils.utility import print_arguments
@@ -48,7 +46,7 @@ if __name__ == "__main__":
     print("model_type:{}".format(args.model_type))
 
     # https://yaml.org/type/float.html
-    config = get_cfg_defaults(args.model_type)
+    config = CfgNode(new_allowed=True)
     if args.config:
         config.merge_from_file(args.config)
     if args.decode_cfg:
diff --git a/paddlespeech/s2t/exps/deepspeech2/bin/test_wav.py b/paddlespeech/s2t/exps/deepspeech2/bin/test_wav.py
index 28756b05..e2cb7e2f 100644
--- a/paddlespeech/s2t/exps/deepspeech2/bin/test_wav.py
+++ b/paddlespeech/s2t/exps/deepspeech2/bin/test_wav.py
@@ -188,7 +188,7 @@ if __name__ == "__main__":
     print("model_type:{}".format(args.model_type))
 
     # https://yaml.org/type/float.html
-    config = get_cfg_defaults(args.model_type)
+    config = CfgNode(new_allowed=True)
     if args.config:
         config.merge_from_file(args.config)
     if args.decode_cfg:
diff --git a/paddlespeech/s2t/exps/deepspeech2/bin/train.py b/paddlespeech/s2t/exps/deepspeech2/bin/train.py
index 400538f9..5e8c0fff 100644
--- a/paddlespeech/s2t/exps/deepspeech2/bin/train.py
+++ b/paddlespeech/s2t/exps/deepspeech2/bin/train.py
@@ -14,7 +14,7 @@
 """Trainer for DeepSpeech2 model."""
 from paddle import distributed as dist
 
-from paddlespeech.s2t.exps.deepspeech2.config import get_cfg_defaults
+from yacs.config import CfgNode
 from paddlespeech.s2t.exps.deepspeech2.model import DeepSpeech2Trainer as Trainer
 from paddlespeech.s2t.training.cli import default_argument_parser
 from paddlespeech.s2t.utils.utility import print_arguments
@@ -42,7 +42,7 @@ if __name__ == "__main__":
     print_arguments(args, globals())
 
     # https://yaml.org/type/float.html
-    config = get_cfg_defaults(args.model_type)
+    config = CfgNode(new_allowed=True)
     if args.config:
         config.merge_from_file(args.config)
     if args.opts:
diff --git a/paddlespeech/s2t/exps/deepspeech2/config.py b/paddlespeech/s2t/exps/deepspeech2/config.py
deleted file mode 100644
index d8eab50e..00000000
--- a/paddlespeech/s2t/exps/deepspeech2/config.py
+++ /dev/null
@@ -1,28 +0,0 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from yacs.config import CfgNode
-
-from paddlespeech.s2t.exps.deepspeech2.model import DeepSpeech2Tester
-from paddlespeech.s2t.exps.deepspeech2.model import DeepSpeech2Trainer
-from paddlespeech.s2t.io.collator import SpeechCollator
-from paddlespeech.s2t.io.dataset import ManifestDataset
-from paddlespeech.s2t.models.ds2 import DeepSpeech2Model
-from paddlespeech.s2t.models.ds2_online import DeepSpeech2ModelOnline
-
-
-def get_cfg_defaults(model_type='offline'):
-    _C = CfgNode()
-    config = _C.clone()
-    config.set_new_allowed(True)
-    return config
diff --git a/paddlespeech/s2t/exps/deepspeech2/model.py b/paddlespeech/s2t/exps/deepspeech2/model.py
index fc214a8a..e7d5e20f 100644
--- a/paddlespeech/s2t/exps/deepspeech2/model.py
+++ b/paddlespeech/s2t/exps/deepspeech2/model.py
@@ -49,22 +49,6 @@ logger = Log(__name__).getlog()
 
 
 class DeepSpeech2Trainer(Trainer):
-    @classmethod
-    def params(cls, config: Optional[CfgNode]=None) -> CfgNode:
-        # training config
-        default = CfgNode(
-            dict(
-                lr=5e-4,  # learning rate
-                lr_decay=1.0,  # learning rate decay
-                weight_decay=1e-6,  # the coeff of weight decay
-                global_grad_clip=5.0,  # the global norm clip
-                n_epoch=50,  # train epochs
-            ))
-
-        if config is not None:
-            config.merge_from_other_cfg(default)
-        return default
-
     def __init__(self, config, args):
         super().__init__(config, args)
 
@@ -259,27 +243,6 @@ class DeepSpeech2Trainer(Trainer):
 
 
 class DeepSpeech2Tester(DeepSpeech2Trainer):
-    @classmethod
-    def params(cls, config: Optional[CfgNode]=None) -> CfgNode:
-        # testing config
-        default = CfgNode(
-            dict(
-                alpha=2.5,  # Coef of LM for beam search.
-                beta=0.3,  # Coef of WC for beam search.
-                cutoff_prob=1.0,  # Cutoff probability for pruning.
-                cutoff_top_n=40,  # Cutoff number for pruning.
-                lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm',  # Filepath for language model.
-                decoding_method='ctc_beam_search',  # Decoding method. Options: ctc_beam_search, ctc_greedy
-                error_rate_type='wer',  # Error rate type for evaluation. Options `wer`, 'cer'
-                num_proc_bsearch=8,  # # of CPUs for beam search.
-                beam_size=500,  # Beam search width.
-                batch_size=128,  # decoding batch size
-            ))
-
-        if config is not None:
-            config.merge_from_other_cfg(default)
-        return default
-
     def __init__(self, config, args):
         super().__init__(config, args)
         self._text_featurizer = TextFeaturizer(
diff --git a/paddlespeech/s2t/exps/u2/bin/alignment.py b/paddlespeech/s2t/exps/u2/bin/alignment.py
index 5d768536..e3390feb 100644
--- a/paddlespeech/s2t/exps/u2/bin/alignment.py
+++ b/paddlespeech/s2t/exps/u2/bin/alignment.py
@@ -14,7 +14,6 @@
 """Alignment for U2 model."""
 from yacs.config import CfgNode
 
-from paddlespeech.s2t.exps.u2.config import get_cfg_defaults
 from paddlespeech.s2t.exps.u2.model import U2Tester as Tester
 from paddlespeech.s2t.training.cli import default_argument_parser
 from paddlespeech.s2t.utils.utility import print_arguments
@@ -33,14 +32,14 @@ def main(config, args):
 
 if __name__ == "__main__":
     parser = default_argument_parser()
-    # save asr result to 
+    # save asr result to
     parser.add_argument(
         "--result_file", type=str, help="path of save the asr result")
     args = parser.parse_args()
     print_arguments(args, globals())
 
     # https://yaml.org/type/float.html
-    config = get_cfg_defaults()
+    config = CfgNode(new_allowed=True)
     if args.config:
         config.merge_from_file(args.config)
     if args.decode_cfg:
diff --git a/paddlespeech/s2t/exps/u2/bin/export.py b/paddlespeech/s2t/exps/u2/bin/export.py
index 44fc7c3e..3907cebd 100644
--- a/paddlespeech/s2t/exps/u2/bin/export.py
+++ b/paddlespeech/s2t/exps/u2/bin/export.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Export for U2 model."""
-from paddlespeech.s2t.exps.u2.config import get_cfg_defaults
+from yacs.config import CfgNode
 from paddlespeech.s2t.exps.u2.model import U2Tester as Tester
 from paddlespeech.s2t.training.cli import default_argument_parser
 from paddlespeech.s2t.utils.utility import print_arguments
@@ -31,14 +31,14 @@ def main(config, args):
 
 if __name__ == "__main__":
     parser = default_argument_parser()
-    # save jit model to 
+    # save jit model to
     parser.add_argument(
         "--export_path", type=str, help="path of the jit model to save")
     args = parser.parse_args()
     print_arguments(args, globals())
 
     # https://yaml.org/type/float.html
-    config = get_cfg_defaults()
+    config = CfgNode(new_allowed=True)
     if args.config:
         config.merge_from_file(args.config)
     if args.opts:
diff --git a/paddlespeech/s2t/exps/u2/bin/test.py b/paddlespeech/s2t/exps/u2/bin/test.py
index d93954fe..f14d804f 100644
--- a/paddlespeech/s2t/exps/u2/bin/test.py
+++ b/paddlespeech/s2t/exps/u2/bin/test.py
@@ -16,7 +16,6 @@ import cProfile
 
 from yacs.config import CfgNode
 
-from paddlespeech.s2t.exps.u2.config import get_cfg_defaults
 from paddlespeech.s2t.exps.u2.model import U2Tester as Tester
 from paddlespeech.s2t.training.cli import default_argument_parser
 from paddlespeech.s2t.utils.utility import print_arguments
@@ -44,7 +43,7 @@ if __name__ == "__main__":
     print_arguments(args, globals())
 
     # https://yaml.org/type/float.html
-    config = get_cfg_defaults()
+    config = CfgNode(new_allowed=True)
     if args.config:
         config.merge_from_file(args.config)
     if args.decode_cfg:
diff --git a/paddlespeech/s2t/exps/u2/bin/test_wav.py b/paddlespeech/s2t/exps/u2/bin/test_wav.py
index 554d6ca5..9904813a 100644
--- a/paddlespeech/s2t/exps/u2/bin/test_wav.py
+++ b/paddlespeech/s2t/exps/u2/bin/test_wav.py
@@ -20,7 +20,6 @@ import paddle
 import soundfile
 from yacs.config import CfgNode
 
-from paddlespeech.s2t.exps.u2.config import get_cfg_defaults
 from paddlespeech.s2t.frontend.featurizer.text_featurizer import TextFeaturizer
 from paddlespeech.s2t.models.u2 import U2Model
 from paddlespeech.s2t.training.cli import default_argument_parser
@@ -129,7 +128,7 @@ if __name__ == "__main__":
         "--audio_file", type=str, help="path of the input audio file")
     args = parser.parse_args()
 
-    config = get_cfg_defaults()
+    config = CfgNode(new_allowed=True)
     if args.config:
         config.merge_from_file(args.config)
     if args.decode_cfg:
diff --git a/paddlespeech/s2t/exps/u2/bin/train.py b/paddlespeech/s2t/exps/u2/bin/train.py
index d6ee8b30..d562278f 100644
--- a/paddlespeech/s2t/exps/u2/bin/train.py
+++ b/paddlespeech/s2t/exps/u2/bin/train.py
@@ -17,7 +17,7 @@ import os
 
 from paddle import distributed as dist
 
-from paddlespeech.s2t.exps.u2.config import get_cfg_defaults
+from yacs.config import CfgNode
 from paddlespeech.s2t.exps.u2.model import U2Trainer as Trainer
 from paddlespeech.s2t.training.cli import default_argument_parser
 from paddlespeech.s2t.utils.utility import print_arguments
@@ -44,7 +44,7 @@ if __name__ == "__main__":
     print_arguments(args, globals())
 
     # https://yaml.org/type/float.html
-    config = get_cfg_defaults()
+    config = CfgNode(new_allowed=True)
     if args.config:
         config.merge_from_file(args.config)
     if args.opts:
diff --git a/paddlespeech/s2t/exps/u2/model.py b/paddlespeech/s2t/exps/u2/model.py
index f1683d70..d0cea031 100644
--- a/paddlespeech/s2t/exps/u2/model.py
+++ b/paddlespeech/s2t/exps/u2/model.py
@@ -46,33 +46,6 @@ logger = Log(__name__).getlog()
 
 
 class U2Trainer(Trainer):
-    @classmethod
-    def params(cls, config: Optional[CfgNode]=None) -> CfgNode:
-        # training config
-        default = CfgNode(
-            dict(
-                n_epoch=50,  # train epochs
-                log_interval=100,  # steps
-                accum_grad=1,  # accum grad by # steps
-                global_grad_clip=5.0,  # the global norm clip
-            ))
-        default.optim = 'adam'
-        default.optim_conf = CfgNode(
-            dict(
-                lr=5e-4,  # learning rate
-                weight_decay=1e-6,  # the coeff of weight decay
-            ))
-        default.scheduler = 'warmuplr'
-        default.scheduler_conf = CfgNode(
-            dict(
-                warmup_steps=25000,
-                lr_decay=1.0,  # learning rate decay
-            ))
-
-        if config is not None:
-            config.merge_from_other_cfg(default)
-        return default
-
     def __init__(self, config, args):
         super().__init__(config, args)
 
@@ -401,35 +374,6 @@ class U2Trainer(Trainer):
 
 
 class U2Tester(U2Trainer):
-    @classmethod
-    def params(cls, config: Optional[CfgNode]=None) -> CfgNode:
-        # decoding config
-        default = CfgNode(
-            dict(
-                alpha=2.5,  # Coef of LM for beam search.
-                beta=0.3,  # Coef of WC for beam search.
-                cutoff_prob=1.0,  # Cutoff probability for pruning.
-                cutoff_top_n=40,  # Cutoff number for pruning.
-                lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm',  # Filepath for language model.
-                decoding_method='attention',  # Decoding method. Options: 'attention', 'ctc_greedy_search',
-                # 'ctc_prefix_beam_search', 'attention_rescoring'
-                error_rate_type='wer',  # Error rate type for evaluation. Options `wer`, 'cer'
-                num_proc_bsearch=8,  # # of CPUs for beam search.
-                beam_size=10,  # Beam search width.
-                decode_batch_size=16,  # decoding batch size
-                ctc_weight=0.0,  # ctc weight for attention rescoring decode mode.
-                decoding_chunk_size=-1,  # decoding chunk size. Defaults to -1.
-                # <0: for decoding, use full chunk.
-                # >0: for decoding, use fixed chunk size as set.
-                # 0: used for training, it's prohibited here.
-                num_decoding_left_chunks=-1,  # number of left chunks for decoding. Defaults to -1.
-                simulate_streaming=False,  # simulate streaming inference. Defaults to False.
-            ))
-
-        if config is not None:
-            config.merge_from_other_cfg(default)
-        return default
-
     def __init__(self, config, args):
         super().__init__(config, args)
         self.text_feature = TextFeaturizer(
diff --git a/paddlespeech/s2t/exps/u2/trainer.py b/paddlespeech/s2t/exps/u2/trainer.py
index 57d87316..ab87c30d 100644
--- a/paddlespeech/s2t/exps/u2/trainer.py
+++ b/paddlespeech/s2t/exps/u2/trainer.py
@@ -55,7 +55,6 @@ class U2Trainer(Trainer):
 
         collate_fn_train = SpeechCollator.from_config(config)
 
-        config.augmentation_config = ""
         collate_fn_dev = SpeechCollator.from_config(config)
 
         if self.parallel:
@@ -103,7 +102,6 @@ class U2Trainer(Trainer):
         test_dataset = ManifestDataset.from_config(config)
         # return text ord id
         config.keep_transcription_text = True
-        config.augmentation_config = ""
         self.test_loader = DataLoader(
             test_dataset,
             batch_size=config.decode.batch_size,
diff --git a/paddlespeech/s2t/exps/u2_kaldi/model.py b/paddlespeech/s2t/exps/u2_kaldi/model.py
index 887dd29e..780c5c08 100644
--- a/paddlespeech/s2t/exps/u2_kaldi/model.py
+++ b/paddlespeech/s2t/exps/u2_kaldi/model.py
@@ -42,40 +42,7 @@ from paddlespeech.s2t.utils.utility import UpdateConfig
 
 logger = Log(__name__).getlog()
 
-
-def get_cfg_defaults():
-    """Get a yacs CfgNode object with default values for my_project."""
-    # Return a clone so that the defaults will not be altered
-    # This is for the "local variable" use pattern
-    _C = CfgNode()
-
-    _C.model = U2Model.params()
-
-    _C.training = U2Trainer.params()
-
-    _C.decoding = U2Tester.params()
-
-    config = _C.clone()
-    config.set_new_allowed(True)
-    return config
-
-
 class U2Trainer(Trainer):
-    @classmethod
-    def params(cls, config: Optional[CfgNode]=None) -> CfgNode:
-        # training config
-        default = CfgNode(
-            dict(
-                n_epoch=50,  # train epochs
-                log_interval=100,  # steps
-                accum_grad=1,  # accum grad by # steps
-                checkpoint=dict(
-                    kbest_n=50,
-                    latest_n=5, ), ))
-        if config is not None:
-            config.merge_from_other_cfg(default)
-        return default
-
     def __init__(self, config, args):
         super().__init__(config, args)
 
@@ -362,35 +329,6 @@ class U2Trainer(Trainer):
 
 
 class U2Tester(U2Trainer):
-    @classmethod
-    def params(cls, config: Optional[CfgNode]=None) -> CfgNode:
-        # decoding config
-        default = CfgNode(
-            dict(
-                alpha=2.5,  # Coef of LM for beam search.
-                beta=0.3,  # Coef of WC for beam search.
-                cutoff_prob=1.0,  # Cutoff probability for pruning.
-                cutoff_top_n=40,  # Cutoff number for pruning.
-                lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm',  # Filepath for language model.
-                decoding_method='attention',  # Decoding method. Options: 'attention', 'ctc_greedy_search',
-                # 'ctc_prefix_beam_search', 'attention_rescoring'
-                error_rate_type='wer',  # Error rate type for evaluation. Options `wer`, 'cer'
-                num_proc_bsearch=8,  # # of CPUs for beam search.
-                beam_size=10,  # Beam search width.
-                batch_size=16,  # decoding batch size
-                ctc_weight=0.0,  # ctc weight for attention rescoring decode mode.
-                decoding_chunk_size=-1,  # decoding chunk size. Defaults to -1.
-                # <0: for decoding, use full chunk.
-                # >0: for decoding, use fixed chunk size as set.
-                # 0: used for training, it's prohibited here.
-                num_decoding_left_chunks=-1,  # number of left chunks for decoding. Defaults to -1.
-                simulate_streaming=False,  # simulate streaming inference. Defaults to False.
-            ))
-
-        if config is not None:
-            config.merge_from_other_cfg(default)
-        return default
-
     def __init__(self, config, args):
         super().__init__(config, args)
         self.text_feature = TextFeaturizer(
diff --git a/paddlespeech/s2t/exps/u2_st/bin/export.py b/paddlespeech/s2t/exps/u2_st/bin/export.py
index 69d9718f..1bc4e1f3 100644
--- a/paddlespeech/s2t/exps/u2_st/bin/export.py
+++ b/paddlespeech/s2t/exps/u2_st/bin/export.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Export for U2 model."""
-from paddlespeech.s2t.exps.u2_st.config import get_cfg_defaults
+from yacs.config import CfgNode
 from paddlespeech.s2t.exps.u2_st.model import U2STTester as Tester
 from paddlespeech.s2t.training.cli import default_argument_parser
 from paddlespeech.s2t.utils.utility import print_arguments
@@ -31,14 +31,14 @@ def main(config, args):
 
 if __name__ == "__main__":
     parser = default_argument_parser()
-    # save jit model to 
+    # save jit model to
     parser.add_argument(
         "--export_path", type=str, help="path of the jit model to save")
     args = parser.parse_args()
     print_arguments(args, globals())
 
     # https://yaml.org/type/float.html
-    config = get_cfg_defaults()
+    config = CfgNode(new_allowed=True)
     if args.config:
         config.merge_from_file(args.config)
     if args.opts:
diff --git a/paddlespeech/s2t/exps/u2_st/bin/test.py b/paddlespeech/s2t/exps/u2_st/bin/test.py
index 3ad5fc7d..1d70a310 100644
--- a/paddlespeech/s2t/exps/u2_st/bin/test.py
+++ b/paddlespeech/s2t/exps/u2_st/bin/test.py
@@ -16,7 +16,6 @@ import cProfile
 
 from yacs.config import CfgNode
 
-from paddlespeech.s2t.exps.u2_st.config import get_cfg_defaults
 from paddlespeech.s2t.exps.u2_st.model import U2STTester as Tester
 from paddlespeech.s2t.training.cli import default_argument_parser
 from paddlespeech.s2t.utils.utility import print_arguments
@@ -44,7 +43,7 @@ if __name__ == "__main__":
     print_arguments(args, globals())
 
     # https://yaml.org/type/float.html
-    config = get_cfg_defaults()
+    config = CfgNode(new_allowed=True)
     if args.config:
         config.merge_from_file(args.config)
     if args.decode_cfg:
diff --git a/paddlespeech/s2t/exps/u2_st/bin/train.py b/paddlespeech/s2t/exps/u2_st/bin/train.py
index 58496c88..4dec9ec8 100644
--- a/paddlespeech/s2t/exps/u2_st/bin/train.py
+++ b/paddlespeech/s2t/exps/u2_st/bin/train.py
@@ -16,8 +16,8 @@ import cProfile
 import os
 
 from paddle import distributed as dist
+from yacs.config import CfgNode
 
-from paddlespeech.s2t.exps.u2_st.config import get_cfg_defaults
 from paddlespeech.s2t.exps.u2_st.model import U2STTrainer as Trainer
 from paddlespeech.s2t.training.cli import default_argument_parser
 from paddlespeech.s2t.utils.utility import print_arguments
@@ -42,7 +42,7 @@ if __name__ == "__main__":
     print_arguments(args, globals())
 
     # https://yaml.org/type/float.html
-    config = get_cfg_defaults()
+    config = CfgNode(new_allowed=True)
     if args.config:
         config.merge_from_file(args.config)
     if args.opts:
diff --git a/paddlespeech/s2t/exps/u2_st/config.py b/paddlespeech/s2t/exps/u2_st/config.py
deleted file mode 100644
index a314a1ca..00000000
--- a/paddlespeech/s2t/exps/u2_st/config.py
+++ /dev/null
@@ -1,41 +0,0 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from yacs.config import CfgNode
-
-from paddlespeech.s2t.exps.u2_st.model import U2STTester
-from paddlespeech.s2t.exps.u2_st.model import U2STTrainer
-from paddlespeech.s2t.io.collator import SpeechCollator
-from paddlespeech.s2t.io.dataset import ManifestDataset
-from paddlespeech.s2t.models.u2_st import U2STModel
-
-_C = CfgNode()
-
-# _C.data = ManifestDataset.params()
-
-# _C.collator = SpeechCollator.params()
-
-# _C.model = U2STModel.params()
-
-# _C.training = U2STTrainer.params()
-
-# _C.decoding = U2STTester.params()
-
-
-def get_cfg_defaults():
-    """Get a yacs CfgNode object with default values for my_project."""
-    # Return a clone so that the defaults will not be altered
-    # This is for the "local variable" use pattern
-    config = _C.clone()
-    config.set_new_allowed(True)
-    return config
diff --git a/paddlespeech/s2t/exps/u2_st/model.py b/paddlespeech/s2t/exps/u2_st/model.py
index 88aeb4a5..ca2c2c1d 100644
--- a/paddlespeech/s2t/exps/u2_st/model.py
+++ b/paddlespeech/s2t/exps/u2_st/model.py
@@ -45,33 +45,6 @@ logger = Log(__name__).getlog()
 
 
 class U2STTrainer(Trainer):
-    @classmethod
-    def params(cls, config: Optional[CfgNode]=None) -> CfgNode:
-        # training config
-        default = CfgNode(
-            dict(
-                n_epoch=50,  # train epochs
-                log_interval=100,  # steps
-                accum_grad=1,  # accum grad by # steps
-                global_grad_clip=5.0,  # the global norm clip
-            ))
-        default.optim = 'adam'
-        default.optim_conf = CfgNode(
-            dict(
-                lr=5e-4,  # learning rate
-                weight_decay=1e-6,  # the coeff of weight decay
-            ))
-        default.scheduler = 'warmuplr'
-        default.scheduler_conf = CfgNode(
-            dict(
-                warmup_steps=25000,
-                lr_decay=1.0,  # learning rate decay
-            ))
-
-        if config is not None:
-            config.merge_from_other_cfg(default)
-        return default
-
     def __init__(self, config, args):
         super().__init__(config, args)
 
@@ -127,7 +100,7 @@ class U2STTrainer(Trainer):
 
         for k, v in losses_np.items():
             report(k, v)
-        report("batch_size", self.config.collator.batch_size)
+        report("batch_size", self.config.batch_size)
         report("accum", train_conf.accum_grad)
         report("step_cost", iteration_time)
 
@@ -236,7 +209,7 @@ class U2STTrainer(Trainer):
                             msg += ","
                         msg = msg[:-1]  # remove the last ","
                         if (batch_index + 1
-                            ) % self.config.training.log_interval == 0:
+                            ) % self.config.log_interval == 0:
                             logger.info(msg)
                 except Exception as e:
                     logger.error(e)
@@ -287,7 +260,7 @@ class U2STTrainer(Trainer):
                 batch_frames_in=0,
                 batch_frames_out=0,
                 batch_frames_inout=0,
-                preprocess_conf=config.augmentation_config,  # aug will be off when train_mode=False
+                preprocess_conf=config.preprocess_config,  # aug will be off when train_mode=False
                 n_iter_processes=config.num_workers,
                 subsampling_factor=1,
                 load_aux_output=load_transcript,
@@ -308,7 +281,7 @@ class U2STTrainer(Trainer):
                 batch_frames_in=0,
                 batch_frames_out=0,
                 batch_frames_inout=0,
-                preprocess_conf=config.augmentation_config,  # aug will be off when train_mode=False
+                preprocess_conf=config.preprocess_config,  # aug will be off when train_mode=False
                 n_iter_processes=config.num_workers,
                 subsampling_factor=1,
                 load_aux_output=load_transcript,
@@ -319,7 +292,7 @@ class U2STTrainer(Trainer):
             # test dataset, return raw text
             decode_batch_size = config.get('decode',dict()).get('decode_batch_size', 1)
             self.test_loader = BatchDataLoader(
-                json_file=config.data.test_manifest,
+                json_file=config.test_manifest,
                 train_mode=False,
                 sortagrad=False,
                 batch_size=decode_batch_size,
@@ -332,7 +305,7 @@ class U2STTrainer(Trainer):
                 batch_frames_in=0,
                 batch_frames_out=0,
                 batch_frames_inout=0,
-                preprocess_conf=config.augmentation_config,  # aug will be off when train_mode=False
+                preprocess_conf=config.preprocess_config,  # aug will be off when train_mode=False
                 n_iter_processes=config.num_workers,
                 subsampling_factor=1,
                 num_encs=1,
@@ -379,7 +352,7 @@ class U2STTrainer(Trainer):
                 config,
                 parameters,
                 lr_scheduler=None, ):
-            train_config = config.training
+            train_config = config
             optim_type = train_config.optim
             optim_conf = train_config.optim_conf
             scheduler_type = train_config.scheduler
@@ -405,41 +378,12 @@ class U2STTrainer(Trainer):
 
 
 class U2STTester(U2STTrainer):
-    @classmethod
-    def params(cls, config: Optional[CfgNode]=None) -> CfgNode:
-        # decoding config
-        default = CfgNode(
-            dict(
-                alpha=2.5,  # Coef of LM for beam search.
-                beta=0.3,  # Coef of WC for beam search.
-                cutoff_prob=1.0,  # Cutoff probability for pruning.
-                cutoff_top_n=40,  # Cutoff number for pruning.
-                lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm',  # Filepath for language model.
-                decoding_method='attention',  # Decoding method. Options: 'attention', 'ctc_greedy_search',
-                # 'ctc_prefix_beam_search', 'attention_rescoring'
-                error_rate_type='bleu',  # Error rate type for evaluation. Options `bleu`, 'char_bleu'
-                num_proc_bsearch=8,  # # of CPUs for beam search.
-                beam_size=10,  # Beam search width.
-                batch_size=16,  # decoding batch size
-                ctc_weight=0.0,  # ctc weight for attention rescoring decode mode.
-                decoding_chunk_size=-1,  # decoding chunk size. Defaults to -1.
-                # <0: for decoding, use full chunk.
-                # >0: for decoding, use fixed chunk size as set.
-                # 0: used for training, it's prohibited here.
-                num_decoding_left_chunks=-1,  # number of left chunks for decoding. Defaults to -1.
-                simulate_streaming=False,  # simulate streaming inference. Defaults to False.
-            ))
-
-        if config is not None:
-            config.merge_from_other_cfg(default)
-        return default
-
     def __init__(self, config, args):
         super().__init__(config, args)
         self.text_feature = TextFeaturizer(
-            unit_type=self.config.collator.unit_type,
-            vocab_filepath=self.config.collator.vocab_filepath,
-            spm_model_prefix=self.config.collator.spm_model_prefix)
+            unit_type=self.config.unit_type,
+            vocab=self.config.vocab_filepath,
+            spm_model_prefix=self.config.spm_model_prefix)
         self.vocab_list = self.text_feature.vocab_list
 
     def id2token(self, texts, texts_len, text_feature):
@@ -526,7 +470,7 @@ class U2STTester(U2STTrainer):
         decode_cfg = self.config.decode
         bleu_func = bleu_score.char_bleu if decode_cfg.error_rate_type == 'char-bleu' else bleu_score.bleu
 
-        stride_ms = self.config.collator.stride_ms
+        stride_ms = self.config.stride_ms
         hyps, refs = [], []
         len_refs, num_ins = 0, 0
         num_frames = 0.0
diff --git a/paddlespeech/s2t/io/collator.py b/paddlespeech/s2t/io/collator.py
index 27bf20eb..3a14b2d5 100644
--- a/paddlespeech/s2t/io/collator.py
+++ b/paddlespeech/s2t/io/collator.py
@@ -219,33 +219,6 @@ class SpeechCollatorBase():
 
 
 class SpeechCollator(SpeechCollatorBase):
-    @classmethod
-    def params(cls, config: Optional[CfgNode]=None) -> CfgNode:
-        default = CfgNode(
-            dict(
-                augmentation_config="",
-                random_seed=0,
-                mean_std_filepath="",
-                unit_type="char",
-                vocab_filepath="",
-                spm_model_prefix="",
-                spectrum_type='linear',  # 'linear', 'mfcc', 'fbank'
-                feat_dim=0,  # 'mfcc', 'fbank'
-                delta_delta=False,  # 'mfcc', 'fbank'
-                stride_ms=10.0,  # ms
-                window_ms=20.0,  # ms
-                n_fft=None,  # fft points
-                max_freq=None,  # None for samplerate/2
-                target_sample_rate=16000,  # target sample rate
-                use_dB_normalization=True,
-                target_dB=-20,
-                dither=1.0,  # feature dither
-                keep_transcription_text=False))
-
-        if config is not None:
-            config.merge_from_other_cfg(default)
-        return default
-
     @classmethod
     def from_config(cls, config):
         """Build a SpeechCollator object from a config.
diff --git a/paddlespeech/s2t/io/dataset.py b/paddlespeech/s2t/io/dataset.py
index c76ccfce..9149fb27 100644
--- a/paddlespeech/s2t/io/dataset.py
+++ b/paddlespeech/s2t/io/dataset.py
@@ -28,22 +28,6 @@ logger = Log(__name__).getlog()
 
 
 class ManifestDataset(Dataset):
-    @classmethod
-    def params(cls, config: Optional[CfgNode]=None) -> CfgNode:
-        default = CfgNode(
-            dict(
-                manifest="",
-                max_input_len=27.0,
-                min_input_len=0.0,
-                max_output_len=float('inf'),
-                min_output_len=0.0,
-                max_output_input_ratio=float('inf'),
-                min_output_input_ratio=0.0, ))
-
-        if config is not None:
-            config.merge_from_other_cfg(default)
-        return default
-
     @classmethod
     def from_config(cls, config):
         """Build a ManifestDataset object from a config.
diff --git a/paddlespeech/s2t/models/ds2/deepspeech2.py b/paddlespeech/s2t/models/ds2/deepspeech2.py
index 0414d04f..ddc3612d 100644
--- a/paddlespeech/s2t/models/ds2/deepspeech2.py
+++ b/paddlespeech/s2t/models/ds2/deepspeech2.py
@@ -119,21 +119,6 @@ class DeepSpeech2Model(nn.Layer):
              before softmax) and a ctc cost layer.
     :rtype: tuple of LayerOutput
     """
-
-    @classmethod
-    def params(cls, config: Optional[CfgNode]=None) -> CfgNode:
-        default = CfgNode(
-            dict(
-                num_conv_layers=2,  #Number of stacking convolution layers.
-                num_rnn_layers=3,  #Number of stacking RNN layers.
-                rnn_layer_size=1024,  #RNN layer size (number of RNN cells).
-                use_gru=True,  #Use gru if set True. Use simple rnn if set False.
-                share_rnn_weights=True,  #Whether to share input-hidden weights between forward and backward directional RNNs.Notice that for GRU, weight sharing is not supported.
-                ctc_grad_norm_type=None, ))
-        if config is not None:
-            config.merge_from_other_cfg(default)
-        return default
-
     def __init__(self,
                  feat_size,
                  dict_size,
diff --git a/paddlespeech/s2t/models/ds2_online/deepspeech2.py b/paddlespeech/s2t/models/ds2_online/deepspeech2.py
index f08e30d0..aae77f74 100644
--- a/paddlespeech/s2t/models/ds2_online/deepspeech2.py
+++ b/paddlespeech/s2t/models/ds2_online/deepspeech2.py
@@ -243,23 +243,6 @@ class DeepSpeech2ModelOnline(nn.Layer):
              before softmax) and a ctc cost layer.
     :rtype: tuple of LayerOutput
     """
-
-    @classmethod
-    def params(cls, config: Optional[CfgNode]=None) -> CfgNode:
-        default = CfgNode(
-            dict(
-                num_conv_layers=2,  #Number of stacking convolution layers.
-                num_rnn_layers=4,  #Number of stacking RNN layers.
-                rnn_layer_size=1024,  #RNN layer size (number of RNN cells).
-                num_fc_layers=2,
-                fc_layers_size_list=[512, 256],
-                use_gru=True,  #Use gru if set True. Use simple rnn if set False.
-                blank_id=0,  # index of blank in vocob.txt
-                ctc_grad_norm_type=None, ))
-        if config is not None:
-            config.merge_from_other_cfg(default)
-        return default
-
     def __init__(
             self,
             feat_size,
diff --git a/paddlespeech/s2t/models/u2/u2.py b/paddlespeech/s2t/models/u2/u2.py
index 83eff467..26e81acf 100644
--- a/paddlespeech/s2t/models/u2/u2.py
+++ b/paddlespeech/s2t/models/u2/u2.py
@@ -59,57 +59,6 @@ logger = Log(__name__).getlog()
 
 class U2BaseModel(ASRInterface, nn.Layer):
     """CTC-Attention hybrid Encoder-Decoder model"""
-
-    @classmethod
-    def params(cls, config: Optional[CfgNode]=None) -> CfgNode:
-        # network architecture
-        default = CfgNode()
-        # allow add new item when merge_with_file
-        default.cmvn_file = ""
-        default.cmvn_file_type = "json"
-        default.input_dim = 0
-        default.output_dim = 0
-        # encoder related
-        default.encoder = 'transformer'
-        default.encoder_conf = CfgNode(
-            dict(
-                output_size=256,  # dimension of attention
-                attention_heads=4,
-                linear_units=2048,  # the number of units of position-wise feed forward
-                num_blocks=12,  # the number of encoder blocks
-                dropout_rate=0.1,
-                positional_dropout_rate=0.1,
-                attention_dropout_rate=0.0,
-                input_layer='conv2d',  # encoder input type, you can chose conv2d, conv2d6 and conv2d8
-                normalize_before=True,
-                # use_cnn_module=True,
-                # cnn_module_kernel=15,
-                # activation_type='swish',
-                # pos_enc_layer_type='rel_pos',
-                # selfattention_layer_type='rel_selfattn',
-            ))
-        # decoder related
-        default.decoder = 'transformer'
-        default.decoder_conf = CfgNode(
-            dict(
-                attention_heads=4,
-                linear_units=2048,
-                num_blocks=6,
-                dropout_rate=0.1,
-                positional_dropout_rate=0.1,
-                self_attention_dropout_rate=0.0,
-                src_attention_dropout_rate=0.0, ))
-        # hybrid CTC/attention
-        default.model_conf = CfgNode(
-            dict(
-                ctc_weight=0.3,
-                lsm_weight=0.1,  # label smoothing option
-                length_normalized_loss=False, ))
-
-        if config is not None:
-            config.merge_from_other_cfg(default)
-        return default
-
     def __init__(self,
                  vocab_size: int,
                  encoder: TransformerEncoder,
diff --git a/paddlespeech/s2t/models/u2_st/u2_st.py b/paddlespeech/s2t/models/u2_st/u2_st.py
index 8b07e389..1c5596ba 100644
--- a/paddlespeech/s2t/models/u2_st/u2_st.py
+++ b/paddlespeech/s2t/models/u2_st/u2_st.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """U2 ASR Model
-Unified Streaming and Non-streaming Two-pass End-to-end Model for Speech Recognition 
+Unified Streaming and Non-streaming Two-pass End-to-end Model for Speech Recognition
 (https://arxiv.org/pdf/2012.05481.pdf)
 """
 import time
@@ -51,58 +51,6 @@ logger = Log(__name__).getlog()
 
 class U2STBaseModel(nn.Layer):
     """CTC-Attention hybrid Encoder-Decoder model"""
-
-    @classmethod
-    def params(cls, config: Optional[CfgNode]=None) -> CfgNode:
-        # network architecture
-        default = CfgNode()
-        # allow add new item when merge_with_file
-        default.cmvn_file = ""
-        default.cmvn_file_type = "json"
-        default.input_dim = 0
-        default.output_dim = 0
-        # encoder related
-        default.encoder = 'transformer'
-        default.encoder_conf = CfgNode(
-            dict(
-                output_size=256,  # dimension of attention
-                attention_heads=4,
-                linear_units=2048,  # the number of units of position-wise feed forward
-                num_blocks=12,  # the number of encoder blocks
-                dropout_rate=0.1,
-                positional_dropout_rate=0.1,
-                attention_dropout_rate=0.0,
-                input_layer='conv2d',  # encoder input type, you can chose conv2d, conv2d6 and conv2d8
-                normalize_before=True,
-                # use_cnn_module=True,
-                # cnn_module_kernel=15,
-                # activation_type='swish',
-                # pos_enc_layer_type='rel_pos',
-                # selfattention_layer_type='rel_selfattn', 
-            ))
-        # decoder related
-        default.decoder = 'transformer'
-        default.decoder_conf = CfgNode(
-            dict(
-                attention_heads=4,
-                linear_units=2048,
-                num_blocks=6,
-                dropout_rate=0.1,
-                positional_dropout_rate=0.1,
-                self_attention_dropout_rate=0.0,
-                src_attention_dropout_rate=0.0, ))
-        # hybrid CTC/attention
-        default.model_conf = CfgNode(
-            dict(
-                asr_weight=0.0,
-                ctc_weight=0.0,
-                lsm_weight=0.1,  # label smoothing option
-                length_normalized_loss=False, ))
-
-        if config is not None:
-            config.merge_from_other_cfg(default)
-        return default
-
     def __init__(self,
                  vocab_size: int,
                  encoder: TransformerEncoder,
@@ -289,8 +237,8 @@ class U2STBaseModel(nn.Layer):
             simulate_streaming (bool, optional): streaming or not. Defaults to False.
 
         Returns:
-            Tuple[paddle.Tensor, paddle.Tensor]: 
-                encoder hiddens (B, Tmax, D), 
+            Tuple[paddle.Tensor, paddle.Tensor]:
+                encoder hiddens (B, Tmax, D),
                 encoder hiddens mask (B, 1, Tmax).
         """
         # Let's assume B = batch_size
@@ -533,21 +481,21 @@ class U2STBaseModel(nn.Layer):
             feats (Tenosr): audio features, (B, T, D)
             feats_lengths (Tenosr): (B)
             text_feature (TextFeaturizer): text feature object.
-            decoding_method (str): decoding mode, e.g. 
-                    'fullsentence', 
+            decoding_method (str): decoding mode, e.g.
+                    'fullsentence',
                     'simultaneous'
             beam_size (int): beam size for search
             decoding_chunk_size (int, optional): decoding chunk size. Defaults to -1.
                     <0: for decoding, use full chunk.
                     >0: for decoding, use fixed chunk size as set.
-                    0: used for training, it's prohibited here. 
-            num_decoding_left_chunks (int, optional): 
+                    0: used for training, it's prohibited here.
+            num_decoding_left_chunks (int, optional):
                     number of left chunks for decoding. Defaults to -1.
             simulate_streaming (bool, optional): simulate streaming inference. Defaults to False.
 
         Raises:
             ValueError: when not support decoding_method.
-        
+
         Returns:
             List[List[int]]: transcripts.
         """
@@ -601,7 +549,7 @@ class U2STModel(U2STBaseModel):
             ValueError: raise when using not support encoder type.
 
         Returns:
-            int, nn.Layer, nn.Layer, nn.Layer: vocab size, encoder, decoder, ctc 
+            int, nn.Layer, nn.Layer, nn.Layer: vocab size, encoder, decoder, ctc
         """
         if configs['cmvn_file'] is not None:
             mean, istd = load_cmvn(configs['cmvn_file'],