add lr_deacy

refactor code
5 years ago · 76290375b7
parent e80c741131
commit 76290375b7
14 changed files with 50 additions and 117 deletions
--- a/README.md
+++ b/README.md
@ -232,7 +232,7 @@ In order to inform the trainer of what augmentation components are needed and wh
 When the `--augment_conf_file` argument of `trainer.py` is set to the path of the above example configuration file, every audio clip in every epoch will be processed: with 60% of chance, it will first be speed perturbed with a uniformly random sampled speed-rate between 0.95 and 1.05, and then with 80% of chance it will be shifted in time with a random sampled offset between -5 ms and 5 ms. Finally this newly synthesized audio clip will be feed into the feature extractor for further training.
-For other configuration examples, please refer to `conf/augmenatation.config.example`.
+For other configuration examples, please refer to `examples/conf/augmentation.config.example`.
 Be careful when utilizing the data augmentation technique, as improper augmentation will do harm to the training, due to the enlarged train-test gap.
--- a/README_cn.md
+++ b/README_cn.md
@ -232,7 +232,7 @@ python3 train.py --help
 当`trainer.py`的`--augment_conf_file`参数被设置为上述示例配置文件的路径时，每个 epoch 中的每个音频片段都将被处理。首先，均匀随机采样速率会有60％的概率在 0.95 和 1.05 之间对音频片段进行速度扰动。然后，音频片段有 80％ 的概率在时间上被挪移，挪移偏差值是 -5 毫秒和 5 毫秒之间的随机采样。最后，这个新合成的音频片段将被传送给特征提取器，以用于接下来的训练。
-有关其他配置实例，请参考`conf/augmenatation.config.example`.
+有关其他配置实例，请参考`examples/conf/augmentation.config.example`.
 使用数据增强技术时要小心，由于扩大了训练和测试集的差异，不恰当的增强会对训练模型不利，导致训练和预测的差距增大。
--- a/examples/aishell/conf/deepspeech2.yaml
+++ b/examples/aishell/conf/deepspeech2.yaml
@ -31,14 +31,15 @@ model:
 training:
  n_epoch: 20
  lr: 5e-4
  lr_decay: 1.0
  weight_decay: 1e-06
-  global_grad_clip: 400.0
+  global_grad_clip: 5.0
  max_iteration: 500000
  plot_interval: 1000
  save_interval: 1000
  valid_interval: 1000
 decoding:
-  batch_size: 10
+  batch_size: 128
  error_rate_type: cer 
  decoding_method: ctc_beam_search
  lang_model_path: models/lm/zh_giga.no_cna_cmn.prune01244.klm
--- a/examples/conf/augmentation.config
+++ b/examples/conf/augmentation.config
--- a/examples/conf/augmentation.config.example
+++ b/examples/conf/augmentation.config.example
--- a/examples/tiny/conf/deepspeech2.yaml
+++ b/examples/tiny/conf/deepspeech2.yaml
@ -31,8 +31,9 @@ model:
 training:
  n_epoch: 20
  lr: 1e-5 
  lr_decay: 1.0 
  weight_decay: 1e-06
-  global_grad_clip: 400.0
+  global_grad_clip: 5.0
  max_iteration: 500000
  plot_interval: 1000
  save_interval: 1000
--- a/examples/tiny/local/run_infer.sh
+++ b/examples/tiny/local/run_infer.sh
--- a/examples/tiny/local/train.sh
+++ b/examples/tiny/local/train.sh
@ -3,10 +3,10 @@
 export FLAGS_sync_nccl_allreduce=0
 #CUDA_VISIBLE_DEVICES=0,1,2,3 \
-CUDA_VISIBLE_DEVICES=0,1 \
+CUDA_VISIBLE_DEVICES=0 \
 python3 -u ${MAIN_ROOT}/train.py \
 --device 'gpu' \
--nproc 2 \
+--nproc 1 \
 --config conf/deepspeech2.yaml \
 --output ckpt
--- a/model_utils/config.py
+++ b/model_utils/config.py
@ -53,8 +53,9 @@ _C.model = CN(
 _C.training = CN(
    dict(
        lr=5e-4,  # learning rate
        lr_decay=1.0,  # learning rate decay
        weight_decay=1e-6,  # the coeff of weight decay
-        global_grad_clip=400.0,  # the global norm clip
+        global_grad_clip=5.0,  # the global norm clip
        plot_interval=1000,  # plot attention and spectrogram by step
        valid_interval=1000,  # validation by step
        save_interval=1000,  # checkpoint by step
--- a/model_utils/model.py
+++ b/model_utils/model.py
@ -250,25 +250,15 @@ class DeepSpeech2Trainer(Trainer):
        print_params(model, self.logger)
        grad_clip = MyClipGradByGlobalNorm(config.training.global_grad_clip)
        # optimizer = paddle.optimizer.Adam(
        #     learning_rate=config.training.lr,
        #     parameters=model.parameters(),
        #     weight_decay=paddle.regularizer.L2Decay(
        #         config.training.weight_decay),
        #     grad_clip=grad_clip)
        #learning_rate=fluid.layers.exponential_decay(
        #    learning_rate=learning_rate,
        #    decay_steps=num_samples / batch_size / dev_count,
        #    decay_rate=0.83,
        #    staircase=True),
        lr_scheduler = paddle.optimizer.lr.ExponentialDecay(
-            learning_rate=config.training.lr, gamma=0.83, verbose=True)
+            learning_rate=config.training.lr,
            gamma=config.training.lr_decay,
            verbose=True)
        optimizer = paddle.optimizer.Adam(
            learning_rate=lr_scheduler,
            parameters=model.parameters(),
            weight_decay=paddle.regularizer.L2Decay(
                config.training.weight_decay),
            grad_clip=grad_clip)
        criterion = DeepSpeech2Loss(self.train_loader.dataset.vocab_size)
@ -458,22 +448,12 @@ class DeepSpeech2Tester(DeepSpeech2Trainer):
            output_dir = Path(self.args.output).expanduser() / "infer"
            output_dir.mkdir(parents=True, exist_ok=True)
        else:
-            output_dir = Path(self.args.checkpoint_path).expanduser().parent / "infer"
+            output_dir = Path(
                self.args.checkpoint_path).expanduser().parent / "infer"
            output_dir.mkdir(parents=True, exist_ok=True)
        self.output_dir = output_dir
    # def setup_checkpointer(self):
    #     """Create a directory used to save checkpoints into.
    #     It is "checkpoints" inside the output directory.
    #     """
    #     # checkpoint dir
    #     checkpoint_dir = self.output_dir / "checkpoints"
    #     checkpoint_dir.mkdir(exist_ok=True)
    #     self.checkpoint_dir = checkpoint_dir
    def setup(self):
        """Setup the experiment.
        """
@ -506,7 +486,7 @@ class DeepSpeech2Tester(DeepSpeech2Trainer):
            num_rnn_layers=config.model.num_rnn_layers,
            rnn_size=config.model.rnn_layer_size,
            share_rnn_weights=config.model.share_rnn_weights)
-        
+
        if self.parallel:
            model = paddle.DataParallel(model)
--- a/tools/_init_paths.py
+++ b/tools/_init_paths.py
@ -1,29 +0,0 @@
 # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Set up paths for DS2"""
 import os.path
 import sys
 def add_path(path):
    if path not in sys.path:
        sys.path.insert(0, path)
 this_dir = os.path.dirname(__file__)
 # Add project path to PYTHONPATH
 proj_path = os.path.join(this_dir, '..')
 add_path(proj_path)
--- a/training/trainer.py
+++ b/training/trainer.py
@ -14,6 +14,7 @@
 import time
 import logging
 import logging.handlers
 from pathlib import Path
 import numpy as np
 from collections import defaultdict
@ -249,7 +250,22 @@ class Trainer():
        Each process has its own text logger. The logging message is write to 
        the standard output and a text file named ``worker_n.log`` in the 
        output directory, where ``n`` means the rank of the process. 
        when - how to split the log file by time interval
            'S' : Seconds
            'M' : Minutes
            'H' : Hours
            'D' : Days
            'W' : Week day
            default value: 'D'
        format - format of the log
            default format:
            %(levelname)s: %(asctime)s: %(filename)s:%(lineno)d * %(thread)d %(message)s
            INFO: 12-09 18:02:42: log.py:40 * 139814749787872 HELLO WORLD
        backup - how many backup file to keep
            default value: 7
        """
        when = 'D'
        backup = 7
        format = '[%(levelname)s %(asctime)s %(filename)s:%(lineno)d] %(message)s'
        logger = logging.getLogger(__name__)
@ -270,6 +286,12 @@ class Trainer():
        # file_handler.setFormatter(formatter)
        # logger.addHandler(file_handler)
        handler = logging.handlers.TimedRotatingFileHandler(
            str(self.output_dir / "warning.log"), when=when, backupCount=backup)
        handler.setLevel(logging.WARNING)
        handler.setFormatter(formatter)
        logger.addHandler(handler)
        # global logger
        stdout = False
        save_path = log_file
--- a/tools/tune.py
+++ b/tools/tune.py
@ -34,11 +34,9 @@ add_arg('num_batches',      int,    -1,     "# of batches tuning on. "
                                            "Default -1, on whole dev set.")
 add_arg('batch_size',       int,    256,    "# of samples per batch.")
 add_arg('trainer_count',    int,    8,      "# of Trainers (CPUs or GPUs).")
 add_arg('beam_size',        int,    500,    "Beam search width.")
 add_arg('num_proc_bsearch', int,    8,     "# of CPUs for beam search.")
 add_arg('num_conv_layers',  int,    2,      "# of convolution layers.")
 add_arg('num_rnn_layers',   int,    3,      "# of recurrent layers.")
 add_arg('rnn_layer_size',   int,    2048,   "# of recurrent cells per layer.")
 add_arg('num_alphas',       int,    45,     "# of alpha candidates for tuning.")
 add_arg('num_betas',        int,    8,      "# of beta candidates for tuning.")
 add_arg('alpha_from',       float,  1.0,    "Where alpha starts tuning from.")
@ -47,10 +45,15 @@ add_arg('beta_from',        float,  0.1,    "Where beta starts tuning from.")
 add_arg('beta_to',          float,  0.45,   "Where beta ends tuning with.")
 add_arg('cutoff_prob',      float,  1.0,    "Cutoff probability for pruning.")
 add_arg('cutoff_top_n',     int,    40,     "Cutoff number for pruning.")
 add_arg('num_conv_layers',  int,    2,      "# of convolution layers.")
 add_arg('num_rnn_layers',   int,    3,      "# of recurrent layers.")
 add_arg('rnn_layer_size',   int,    2048,   "# of recurrent cells per layer.")
 add_arg('use_gru',          bool,   False,  "Use GRUs instead of simple RNNs.")
 add_arg('use_gpu',          bool,   True,   "Use GPU or not.")
 add_arg('share_rnn_weights',bool,   True,   "Share input-hidden weights across "
                                            "bi-directional RNNs. Not for GRU.")
 add_arg('tune_manifest',    str,
        'data/librispeech/manifest.dev-clean',
        "Filepath of manifest to tune.")
@ -127,6 +130,8 @@ def tune():
    err_sum = [0.0 for i in range(len(params_grid))]
    err_ave = [0.0 for i in range(len(params_grid))]
    num_ins, len_refs, cur_batch = 0, 0, 0
    # initialize external scorer
    ds2_model.init_ext_scorer(args.alpha_from, args.beta_from,
@ -156,6 +161,7 @@ def tune():
            for target, result in zip(target_transcripts, result_transcripts):
                errors, len_ref = errors_func(target, result)
                err_sum[index] += errors
                # accumulate the length of references of every batch
                # in the first iteration
                if args.alpha_from == alpha and args.beta_from == beta:
--- a/utils/model_check.py
+++ b/utils/model_check.py
@ -1,49 +0,0 @@
 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import sys
 import paddle
 import paddle.fluid as fluid
 def check_cuda(use_cuda, err = \
    "\nYou can not set use_cuda = True in the model because you are using paddlepaddle-cpu.\n \
    Please: 1. Install paddlepaddle-gpu to run your models on GPU or 2. Set use_cuda = False to run models on CPU.\n"
                                                                                                                     ):
    """
    Log error and exit when set use_gpu=true in paddlepaddle
    cpu version.
    """
    try:
        if use_cuda == True and fluid.is_compiled_with_cuda() == False:
            print(err)
            sys.exit(1)
    except Exception as e:
        pass
 def check_version():
    """
    Log error and exit when the installed version of paddlepaddle is
    not satisfied.
    """
    err = "PaddlePaddle version 2.0.0 or higher is required, " \
          "or a suitable develop version is satisfied as well. \n" \
          "Please make sure the version is good with your code." \
    try:
        fluid.require_version('2.0.0')
    except Exception as e:
        print(err)
        sys.exit(1)