diff --git a/README.md b/README.md
index 70065f2b3..3bf8d8963 100644
--- a/README.md
+++ b/README.md
@@ -232,7 +232,7 @@ In order to inform the trainer of what augmentation components are needed and wh
 
 When the `--augment_conf_file` argument of `trainer.py` is set to the path of the above example configuration file, every audio clip in every epoch will be processed: with 60% of chance, it will first be speed perturbed with a uniformly random sampled speed-rate between 0.95 and 1.05, and then with 80% of chance it will be shifted in time with a random sampled offset between -5 ms and 5 ms. Finally this newly synthesized audio clip will be feed into the feature extractor for further training.
 
-For other configuration examples, please refer to `conf/augmenatation.config.example`.
+For other configuration examples, please refer to `examples/conf/augmentation.config.example`.
 
 Be careful when utilizing the data augmentation technique, as improper augmentation will do harm to the training, due to the enlarged train-test gap.
 
diff --git a/README_cn.md b/README_cn.md
index 4ca6dda32..e12886517 100644
--- a/README_cn.md
+++ b/README_cn.md
@@ -232,7 +232,7 @@ python3 train.py --help
 
 当`trainer.py`的`--augment_conf_file`参数被设置为上述示例配置文件的路径时，每个 epoch 中的每个音频片段都将被处理。首先，均匀随机采样速率会有60％的概率在 0.95 和 1.05 之间对音频片段进行速度扰动。然后，音频片段有 80％ 的概率在时间上被挪移，挪移偏差值是 -5 毫秒和 5 毫秒之间的随机采样。最后，这个新合成的音频片段将被传送给特征提取器，以用于接下来的训练。
 
-有关其他配置实例，请参考`conf/augmenatation.config.example`.
+有关其他配置实例，请参考`examples/conf/augmentation.config.example`.
 
 使用数据增强技术时要小心，由于扩大了训练和测试集的差异，不恰当的增强会对训练模型不利，导致训练和预测的差距增大。
 
diff --git a/examples/aishell/conf/deepspeech2.yaml b/examples/aishell/conf/deepspeech2.yaml
index 8bbdfa262..56109ac46 100644
--- a/examples/aishell/conf/deepspeech2.yaml
+++ b/examples/aishell/conf/deepspeech2.yaml
@@ -31,14 +31,15 @@ model:
 training:
   n_epoch: 20
   lr: 5e-4
+  lr_decay: 1.0
   weight_decay: 1e-06
-  global_grad_clip: 400.0
+  global_grad_clip: 5.0
   max_iteration: 500000
   plot_interval: 1000
   save_interval: 1000
   valid_interval: 1000
 decoding:
-  batch_size: 10
+  batch_size: 128
   error_rate_type: cer 
   decoding_method: ctc_beam_search
   lang_model_path: models/lm/zh_giga.no_cna_cmn.prune01244.klm
diff --git a/conf/augmentation.config b/examples/conf/augmentation.config
similarity index 100%
rename from conf/augmentation.config
rename to examples/conf/augmentation.config
diff --git a/conf/augmentation.config.example b/examples/conf/augmentation.config.example
similarity index 100%
rename from conf/augmentation.config.example
rename to examples/conf/augmentation.config.example
diff --git a/examples/tiny/conf/deepspeech2.yaml b/examples/tiny/conf/deepspeech2.yaml
index 457a56b2e..ab4cb510a 100644
--- a/examples/tiny/conf/deepspeech2.yaml
+++ b/examples/tiny/conf/deepspeech2.yaml
@@ -31,8 +31,9 @@ model:
 training:
   n_epoch: 20
   lr: 1e-5 
+  lr_decay: 1.0 
   weight_decay: 1e-06
-  global_grad_clip: 400.0
+  global_grad_clip: 5.0
   max_iteration: 500000
   plot_interval: 1000
   save_interval: 1000
diff --git a/examples/tiny/local/run_infer.sh b/examples/tiny/local/infer.sh
similarity index 100%
rename from examples/tiny/local/run_infer.sh
rename to examples/tiny/local/infer.sh
diff --git a/examples/tiny/local/train.sh b/examples/tiny/local/train.sh
index 8899d2fd1..dfd229172 100644
--- a/examples/tiny/local/train.sh
+++ b/examples/tiny/local/train.sh
@@ -3,10 +3,10 @@
 export FLAGS_sync_nccl_allreduce=0
 
 #CUDA_VISIBLE_DEVICES=0,1,2,3 \
-CUDA_VISIBLE_DEVICES=0,1 \
+CUDA_VISIBLE_DEVICES=0 \
 python3 -u ${MAIN_ROOT}/train.py \
 --device 'gpu' \
---nproc 2 \
+--nproc 1 \
 --config conf/deepspeech2.yaml \
 --output ckpt
 
diff --git a/model_utils/config.py b/model_utils/config.py
index f4b876045..79436110f 100644
--- a/model_utils/config.py
+++ b/model_utils/config.py
@@ -53,8 +53,9 @@ _C.model = CN(
 _C.training = CN(
     dict(
         lr=5e-4,  # learning rate
+        lr_decay=1.0,  # learning rate decay
         weight_decay=1e-6,  # the coeff of weight decay
-        global_grad_clip=400.0,  # the global norm clip
+        global_grad_clip=5.0,  # the global norm clip
         plot_interval=1000,  # plot attention and spectrogram by step
         valid_interval=1000,  # validation by step
         save_interval=1000,  # checkpoint by step
diff --git a/model_utils/model.py b/model_utils/model.py
index d4106f344..f38de6db7 100644
--- a/model_utils/model.py
+++ b/model_utils/model.py
@@ -250,25 +250,15 @@ class DeepSpeech2Trainer(Trainer):
         print_params(model, self.logger)
 
         grad_clip = MyClipGradByGlobalNorm(config.training.global_grad_clip)
-
-        # optimizer = paddle.optimizer.Adam(
-        #     learning_rate=config.training.lr,
-        #     parameters=model.parameters(),
-        #     weight_decay=paddle.regularizer.L2Decay(
-        #         config.training.weight_decay),
-        #     grad_clip=grad_clip)
-
-        #learning_rate=fluid.layers.exponential_decay(
-        #    learning_rate=learning_rate,
-        #    decay_steps=num_samples / batch_size / dev_count,
-        #    decay_rate=0.83,
-        #    staircase=True),
-
         lr_scheduler = paddle.optimizer.lr.ExponentialDecay(
-            learning_rate=config.training.lr, gamma=0.83, verbose=True)
+            learning_rate=config.training.lr,
+            gamma=config.training.lr_decay,
+            verbose=True)
         optimizer = paddle.optimizer.Adam(
             learning_rate=lr_scheduler,
             parameters=model.parameters(),
+            weight_decay=paddle.regularizer.L2Decay(
+                config.training.weight_decay),
             grad_clip=grad_clip)
 
         criterion = DeepSpeech2Loss(self.train_loader.dataset.vocab_size)
@@ -458,22 +448,12 @@ class DeepSpeech2Tester(DeepSpeech2Trainer):
             output_dir = Path(self.args.output).expanduser() / "infer"
             output_dir.mkdir(parents=True, exist_ok=True)
         else:
-            output_dir = Path(self.args.checkpoint_path).expanduser().parent / "infer"
+            output_dir = Path(
+                self.args.checkpoint_path).expanduser().parent / "infer"
             output_dir.mkdir(parents=True, exist_ok=True)
 
         self.output_dir = output_dir
 
-    # def setup_checkpointer(self):
-    #     """Create a directory used to save checkpoints into.
-        
-    #     It is "checkpoints" inside the output directory.
-    #     """
-    #     # checkpoint dir
-    #     checkpoint_dir = self.output_dir / "checkpoints"
-    #     checkpoint_dir.mkdir(exist_ok=True)
-
-    #     self.checkpoint_dir = checkpoint_dir
-
     def setup(self):
         """Setup the experiment.
         """
@@ -506,7 +486,7 @@ class DeepSpeech2Tester(DeepSpeech2Trainer):
             num_rnn_layers=config.model.num_rnn_layers,
             rnn_size=config.model.rnn_layer_size,
             share_rnn_weights=config.model.share_rnn_weights)
-        
+
         if self.parallel:
             model = paddle.DataParallel(model)
 
diff --git a/tools/_init_paths.py b/tools/_init_paths.py
deleted file mode 100644
index c4b28c643..000000000
--- a/tools/_init_paths.py
+++ /dev/null
@@ -1,29 +0,0 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Set up paths for DS2"""
-
-import os.path
-import sys
-
-
-def add_path(path):
-    if path not in sys.path:
-        sys.path.insert(0, path)
-
-
-this_dir = os.path.dirname(__file__)
-
-# Add project path to PYTHONPATH
-proj_path = os.path.join(this_dir, '..')
-add_path(proj_path)
diff --git a/training/trainer.py b/training/trainer.py
index a64924c97..1dcca5aab 100644
--- a/training/trainer.py
+++ b/training/trainer.py
@@ -14,6 +14,7 @@
 
 import time
 import logging
+import logging.handlers
 from pathlib import Path
 import numpy as np
 from collections import defaultdict
@@ -249,7 +250,22 @@ class Trainer():
         Each process has its own text logger. The logging message is write to 
         the standard output and a text file named ``worker_n.log`` in the 
         output directory, where ``n`` means the rank of the process. 
+        when - how to split the log file by time interval
+            'S' : Seconds
+            'M' : Minutes
+            'H' : Hours
+            'D' : Days
+            'W' : Week day
+            default value: 'D'
+        format - format of the log
+            default format:
+            %(levelname)s: %(asctime)s: %(filename)s:%(lineno)d * %(thread)d %(message)s
+            INFO: 12-09 18:02:42: log.py:40 * 139814749787872 HELLO WORLD
+        backup - how many backup file to keep
+            default value: 7
         """
+        when = 'D'
+        backup = 7
         format = '[%(levelname)s %(asctime)s %(filename)s:%(lineno)d] %(message)s'
 
         logger = logging.getLogger(__name__)
@@ -270,6 +286,12 @@ class Trainer():
         # file_handler.setFormatter(formatter)
         # logger.addHandler(file_handler)
 
+        handler = logging.handlers.TimedRotatingFileHandler(
+            str(self.output_dir / "warning.log"), when=when, backupCount=backup)
+        handler.setLevel(logging.WARNING)
+        handler.setFormatter(formatter)
+        logger.addHandler(handler)
+
         # global logger
         stdout = False
         save_path = log_file
diff --git a/tools/tune.py b/tune.py
similarity index 99%
rename from tools/tune.py
rename to tune.py
index 36443e28b..ad48bcb67 100644
--- a/tools/tune.py
+++ b/tune.py
@@ -34,11 +34,9 @@ add_arg('num_batches',      int,    -1,     "# of batches tuning on. "
                                             "Default -1, on whole dev set.")
 add_arg('batch_size',       int,    256,    "# of samples per batch.")
 add_arg('trainer_count',    int,    8,      "# of Trainers (CPUs or GPUs).")
+
 add_arg('beam_size',        int,    500,    "Beam search width.")
 add_arg('num_proc_bsearch', int,    8,     "# of CPUs for beam search.")
-add_arg('num_conv_layers',  int,    2,      "# of convolution layers.")
-add_arg('num_rnn_layers',   int,    3,      "# of recurrent layers.")
-add_arg('rnn_layer_size',   int,    2048,   "# of recurrent cells per layer.")
 add_arg('num_alphas',       int,    45,     "# of alpha candidates for tuning.")
 add_arg('num_betas',        int,    8,      "# of beta candidates for tuning.")
 add_arg('alpha_from',       float,  1.0,    "Where alpha starts tuning from.")
@@ -47,10 +45,15 @@ add_arg('beta_from',        float,  0.1,    "Where beta starts tuning from.")
 add_arg('beta_to',          float,  0.45,   "Where beta ends tuning with.")
 add_arg('cutoff_prob',      float,  1.0,    "Cutoff probability for pruning.")
 add_arg('cutoff_top_n',     int,    40,     "Cutoff number for pruning.")
+
+add_arg('num_conv_layers',  int,    2,      "# of convolution layers.")
+add_arg('num_rnn_layers',   int,    3,      "# of recurrent layers.")
+add_arg('rnn_layer_size',   int,    2048,   "# of recurrent cells per layer.")
 add_arg('use_gru',          bool,   False,  "Use GRUs instead of simple RNNs.")
 add_arg('use_gpu',          bool,   True,   "Use GPU or not.")
 add_arg('share_rnn_weights',bool,   True,   "Share input-hidden weights across "
                                             "bi-directional RNNs. Not for GRU.")
+
 add_arg('tune_manifest',    str,
         'data/librispeech/manifest.dev-clean',
         "Filepath of manifest to tune.")
@@ -127,6 +130,8 @@ def tune():
 
     err_sum = [0.0 for i in range(len(params_grid))]
     err_ave = [0.0 for i in range(len(params_grid))]
+
+
     num_ins, len_refs, cur_batch = 0, 0, 0
     # initialize external scorer
     ds2_model.init_ext_scorer(args.alpha_from, args.beta_from,
@@ -156,6 +161,7 @@ def tune():
             for target, result in zip(target_transcripts, result_transcripts):
                 errors, len_ref = errors_func(target, result)
                 err_sum[index] += errors
+
                 # accumulate the length of references of every batch
                 # in the first iteration
                 if args.alpha_from == alpha and args.beta_from == beta:
diff --git a/utils/model_check.py b/utils/model_check.py
deleted file mode 100644
index e69c02ba3..000000000
--- a/utils/model_check.py
+++ /dev/null
@@ -1,49 +0,0 @@
-# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import sys
-import paddle
-import paddle.fluid as fluid
-
-
-def check_cuda(use_cuda, err = \
-    "\nYou can not set use_cuda = True in the model because you are using paddlepaddle-cpu.\n \
-    Please: 1. Install paddlepaddle-gpu to run your models on GPU or 2. Set use_cuda = False to run models on CPU.\n"
-                                                                                                                     ):
-    """
-    Log error and exit when set use_gpu=true in paddlepaddle
-    cpu version.
-    """
-    try:
-        if use_cuda == True and fluid.is_compiled_with_cuda() == False:
-            print(err)
-            sys.exit(1)
-    except Exception as e:
-        pass
-
-
-def check_version():
-    """
-    Log error and exit when the installed version of paddlepaddle is
-    not satisfied.
-    """
-    err = "PaddlePaddle version 2.0.0 or higher is required, " \
-          "or a suitable develop version is satisfied as well. \n" \
-          "Please make sure the version is good with your code." \
-
-    try:
-        fluid.require_version('2.0.0')
-    except Exception as e:
-        print(err)
-        sys.exit(1)