diff --git a/README.md b/README.md index 70065f2b3..3bf8d8963 100644 --- a/README.md +++ b/README.md @@ -232,7 +232,7 @@ In order to inform the trainer of what augmentation components are needed and wh When the `--augment_conf_file` argument of `trainer.py` is set to the path of the above example configuration file, every audio clip in every epoch will be processed: with 60% of chance, it will first be speed perturbed with a uniformly random sampled speed-rate between 0.95 and 1.05, and then with 80% of chance it will be shifted in time with a random sampled offset between -5 ms and 5 ms. Finally this newly synthesized audio clip will be feed into the feature extractor for further training. -For other configuration examples, please refer to `conf/augmenatation.config.example`. +For other configuration examples, please refer to `examples/conf/augmentation.config.example`. Be careful when utilizing the data augmentation technique, as improper augmentation will do harm to the training, due to the enlarged train-test gap. diff --git a/README_cn.md b/README_cn.md index 4ca6dda32..e12886517 100644 --- a/README_cn.md +++ b/README_cn.md @@ -232,7 +232,7 @@ python3 train.py --help 当`trainer.py`的`--augment_conf_file`参数被设置为上述示例配置文件的路径时,每个 epoch 中的每个音频片段都将被处理。首先,均匀随机采样速率会有60%的概率在 0.95 和 1.05 之间对音频片段进行速度扰动。然后,音频片段有 80% 的概率在时间上被挪移,挪移偏差值是 -5 毫秒和 5 毫秒之间的随机采样。最后,这个新合成的音频片段将被传送给特征提取器,以用于接下来的训练。 -有关其他配置实例,请参考`conf/augmenatation.config.example`. +有关其他配置实例,请参考`examples/conf/augmentation.config.example`. 使用数据增强技术时要小心,由于扩大了训练和测试集的差异,不恰当的增强会对训练模型不利,导致训练和预测的差距增大。 diff --git a/examples/aishell/conf/deepspeech2.yaml b/examples/aishell/conf/deepspeech2.yaml index 8bbdfa262..56109ac46 100644 --- a/examples/aishell/conf/deepspeech2.yaml +++ b/examples/aishell/conf/deepspeech2.yaml @@ -31,14 +31,15 @@ model: training: n_epoch: 20 lr: 5e-4 + lr_decay: 1.0 weight_decay: 1e-06 - global_grad_clip: 400.0 + global_grad_clip: 5.0 max_iteration: 500000 plot_interval: 1000 save_interval: 1000 valid_interval: 1000 decoding: - batch_size: 10 + batch_size: 128 error_rate_type: cer decoding_method: ctc_beam_search lang_model_path: models/lm/zh_giga.no_cna_cmn.prune01244.klm diff --git a/conf/augmentation.config b/examples/conf/augmentation.config similarity index 100% rename from conf/augmentation.config rename to examples/conf/augmentation.config diff --git a/conf/augmentation.config.example b/examples/conf/augmentation.config.example similarity index 100% rename from conf/augmentation.config.example rename to examples/conf/augmentation.config.example diff --git a/examples/tiny/conf/deepspeech2.yaml b/examples/tiny/conf/deepspeech2.yaml index 457a56b2e..ab4cb510a 100644 --- a/examples/tiny/conf/deepspeech2.yaml +++ b/examples/tiny/conf/deepspeech2.yaml @@ -31,8 +31,9 @@ model: training: n_epoch: 20 lr: 1e-5 + lr_decay: 1.0 weight_decay: 1e-06 - global_grad_clip: 400.0 + global_grad_clip: 5.0 max_iteration: 500000 plot_interval: 1000 save_interval: 1000 diff --git a/examples/tiny/local/run_infer.sh b/examples/tiny/local/infer.sh similarity index 100% rename from examples/tiny/local/run_infer.sh rename to examples/tiny/local/infer.sh diff --git a/examples/tiny/local/train.sh b/examples/tiny/local/train.sh index 8899d2fd1..dfd229172 100644 --- a/examples/tiny/local/train.sh +++ b/examples/tiny/local/train.sh @@ -3,10 +3,10 @@ export FLAGS_sync_nccl_allreduce=0 #CUDA_VISIBLE_DEVICES=0,1,2,3 \ -CUDA_VISIBLE_DEVICES=0,1 \ +CUDA_VISIBLE_DEVICES=0 \ python3 -u ${MAIN_ROOT}/train.py \ --device 'gpu' \ ---nproc 2 \ +--nproc 1 \ --config conf/deepspeech2.yaml \ --output ckpt diff --git a/model_utils/config.py b/model_utils/config.py index f4b876045..79436110f 100644 --- a/model_utils/config.py +++ b/model_utils/config.py @@ -53,8 +53,9 @@ _C.model = CN( _C.training = CN( dict( lr=5e-4, # learning rate + lr_decay=1.0, # learning rate decay weight_decay=1e-6, # the coeff of weight decay - global_grad_clip=400.0, # the global norm clip + global_grad_clip=5.0, # the global norm clip plot_interval=1000, # plot attention and spectrogram by step valid_interval=1000, # validation by step save_interval=1000, # checkpoint by step diff --git a/model_utils/model.py b/model_utils/model.py index d4106f344..f38de6db7 100644 --- a/model_utils/model.py +++ b/model_utils/model.py @@ -250,25 +250,15 @@ class DeepSpeech2Trainer(Trainer): print_params(model, self.logger) grad_clip = MyClipGradByGlobalNorm(config.training.global_grad_clip) - - # optimizer = paddle.optimizer.Adam( - # learning_rate=config.training.lr, - # parameters=model.parameters(), - # weight_decay=paddle.regularizer.L2Decay( - # config.training.weight_decay), - # grad_clip=grad_clip) - - #learning_rate=fluid.layers.exponential_decay( - # learning_rate=learning_rate, - # decay_steps=num_samples / batch_size / dev_count, - # decay_rate=0.83, - # staircase=True), - lr_scheduler = paddle.optimizer.lr.ExponentialDecay( - learning_rate=config.training.lr, gamma=0.83, verbose=True) + learning_rate=config.training.lr, + gamma=config.training.lr_decay, + verbose=True) optimizer = paddle.optimizer.Adam( learning_rate=lr_scheduler, parameters=model.parameters(), + weight_decay=paddle.regularizer.L2Decay( + config.training.weight_decay), grad_clip=grad_clip) criterion = DeepSpeech2Loss(self.train_loader.dataset.vocab_size) @@ -458,22 +448,12 @@ class DeepSpeech2Tester(DeepSpeech2Trainer): output_dir = Path(self.args.output).expanduser() / "infer" output_dir.mkdir(parents=True, exist_ok=True) else: - output_dir = Path(self.args.checkpoint_path).expanduser().parent / "infer" + output_dir = Path( + self.args.checkpoint_path).expanduser().parent / "infer" output_dir.mkdir(parents=True, exist_ok=True) self.output_dir = output_dir - # def setup_checkpointer(self): - # """Create a directory used to save checkpoints into. - - # It is "checkpoints" inside the output directory. - # """ - # # checkpoint dir - # checkpoint_dir = self.output_dir / "checkpoints" - # checkpoint_dir.mkdir(exist_ok=True) - - # self.checkpoint_dir = checkpoint_dir - def setup(self): """Setup the experiment. """ @@ -506,7 +486,7 @@ class DeepSpeech2Tester(DeepSpeech2Trainer): num_rnn_layers=config.model.num_rnn_layers, rnn_size=config.model.rnn_layer_size, share_rnn_weights=config.model.share_rnn_weights) - + if self.parallel: model = paddle.DataParallel(model) diff --git a/tools/_init_paths.py b/tools/_init_paths.py deleted file mode 100644 index c4b28c643..000000000 --- a/tools/_init_paths.py +++ /dev/null @@ -1,29 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Set up paths for DS2""" - -import os.path -import sys - - -def add_path(path): - if path not in sys.path: - sys.path.insert(0, path) - - -this_dir = os.path.dirname(__file__) - -# Add project path to PYTHONPATH -proj_path = os.path.join(this_dir, '..') -add_path(proj_path) diff --git a/training/trainer.py b/training/trainer.py index a64924c97..1dcca5aab 100644 --- a/training/trainer.py +++ b/training/trainer.py @@ -14,6 +14,7 @@ import time import logging +import logging.handlers from pathlib import Path import numpy as np from collections import defaultdict @@ -249,7 +250,22 @@ class Trainer(): Each process has its own text logger. The logging message is write to the standard output and a text file named ``worker_n.log`` in the output directory, where ``n`` means the rank of the process. + when - how to split the log file by time interval + 'S' : Seconds + 'M' : Minutes + 'H' : Hours + 'D' : Days + 'W' : Week day + default value: 'D' + format - format of the log + default format: + %(levelname)s: %(asctime)s: %(filename)s:%(lineno)d * %(thread)d %(message)s + INFO: 12-09 18:02:42: log.py:40 * 139814749787872 HELLO WORLD + backup - how many backup file to keep + default value: 7 """ + when = 'D' + backup = 7 format = '[%(levelname)s %(asctime)s %(filename)s:%(lineno)d] %(message)s' logger = logging.getLogger(__name__) @@ -270,6 +286,12 @@ class Trainer(): # file_handler.setFormatter(formatter) # logger.addHandler(file_handler) + handler = logging.handlers.TimedRotatingFileHandler( + str(self.output_dir / "warning.log"), when=when, backupCount=backup) + handler.setLevel(logging.WARNING) + handler.setFormatter(formatter) + logger.addHandler(handler) + # global logger stdout = False save_path = log_file diff --git a/tools/tune.py b/tune.py similarity index 99% rename from tools/tune.py rename to tune.py index 36443e28b..ad48bcb67 100644 --- a/tools/tune.py +++ b/tune.py @@ -34,11 +34,9 @@ add_arg('num_batches', int, -1, "# of batches tuning on. " "Default -1, on whole dev set.") add_arg('batch_size', int, 256, "# of samples per batch.") add_arg('trainer_count', int, 8, "# of Trainers (CPUs or GPUs).") + add_arg('beam_size', int, 500, "Beam search width.") add_arg('num_proc_bsearch', int, 8, "# of CPUs for beam search.") -add_arg('num_conv_layers', int, 2, "# of convolution layers.") -add_arg('num_rnn_layers', int, 3, "# of recurrent layers.") -add_arg('rnn_layer_size', int, 2048, "# of recurrent cells per layer.") add_arg('num_alphas', int, 45, "# of alpha candidates for tuning.") add_arg('num_betas', int, 8, "# of beta candidates for tuning.") add_arg('alpha_from', float, 1.0, "Where alpha starts tuning from.") @@ -47,10 +45,15 @@ add_arg('beta_from', float, 0.1, "Where beta starts tuning from.") add_arg('beta_to', float, 0.45, "Where beta ends tuning with.") add_arg('cutoff_prob', float, 1.0, "Cutoff probability for pruning.") add_arg('cutoff_top_n', int, 40, "Cutoff number for pruning.") + +add_arg('num_conv_layers', int, 2, "# of convolution layers.") +add_arg('num_rnn_layers', int, 3, "# of recurrent layers.") +add_arg('rnn_layer_size', int, 2048, "# of recurrent cells per layer.") add_arg('use_gru', bool, False, "Use GRUs instead of simple RNNs.") add_arg('use_gpu', bool, True, "Use GPU or not.") add_arg('share_rnn_weights',bool, True, "Share input-hidden weights across " "bi-directional RNNs. Not for GRU.") + add_arg('tune_manifest', str, 'data/librispeech/manifest.dev-clean', "Filepath of manifest to tune.") @@ -127,6 +130,8 @@ def tune(): err_sum = [0.0 for i in range(len(params_grid))] err_ave = [0.0 for i in range(len(params_grid))] + + num_ins, len_refs, cur_batch = 0, 0, 0 # initialize external scorer ds2_model.init_ext_scorer(args.alpha_from, args.beta_from, @@ -156,6 +161,7 @@ def tune(): for target, result in zip(target_transcripts, result_transcripts): errors, len_ref = errors_func(target, result) err_sum[index] += errors + # accumulate the length of references of every batch # in the first iteration if args.alpha_from == alpha and args.beta_from == beta: diff --git a/utils/model_check.py b/utils/model_check.py deleted file mode 100644 index e69c02ba3..000000000 --- a/utils/model_check.py +++ /dev/null @@ -1,49 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import paddle -import paddle.fluid as fluid - - -def check_cuda(use_cuda, err = \ - "\nYou can not set use_cuda = True in the model because you are using paddlepaddle-cpu.\n \ - Please: 1. Install paddlepaddle-gpu to run your models on GPU or 2. Set use_cuda = False to run models on CPU.\n" - ): - """ - Log error and exit when set use_gpu=true in paddlepaddle - cpu version. - """ - try: - if use_cuda == True and fluid.is_compiled_with_cuda() == False: - print(err) - sys.exit(1) - except Exception as e: - pass - - -def check_version(): - """ - Log error and exit when the installed version of paddlepaddle is - not satisfied. - """ - err = "PaddlePaddle version 2.0.0 or higher is required, " \ - "or a suitable develop version is satisfied as well. \n" \ - "Please make sure the version is good with your code." \ - - try: - fluid.require_version('2.0.0') - except Exception as e: - print(err) - sys.exit(1)