From 926b1876c74a41e3c88c0c277f11c9bedaba0434 Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Thu, 15 Apr 2021 05:04:54 +0000 Subject: [PATCH] fix logger --- deepspeech/__init__.py | 25 ++++++++++++++-- deepspeech/exps/deepspeech2/model.py | 2 +- deepspeech/exps/u2/model.py | 10 +++---- deepspeech/io/sampler.py | 4 +-- deepspeech/models/u2.py | 10 +++++++ deepspeech/training/gradclip.py | 6 ++-- deepspeech/training/scheduler.py | 4 +-- deepspeech/training/trainer.py | 43 ++++------------------------ deepspeech/utils/layer_tools.py | 8 ++++-- examples/tiny/s1/conf/conformer.yaml | 2 +- requirements.txt | 1 + 11 files changed, 59 insertions(+), 56 deletions(-) diff --git a/deepspeech/__init__.py b/deepspeech/__init__.py index 176ae4f96..a1617c78a 100644 --- a/deepspeech/__init__.py +++ b/deepspeech/__init__.py @@ -22,6 +22,27 @@ from paddle import nn from paddle.nn import functional as F #TODO(Hui Zhang): remove fluid import from paddle.fluid import core + +root = logging.getLogger() +root.setLevel(logging.DEBUG) + +format = '[%(levelname)s %(asctime)s %(filename)s:%(lineno)d] %(message)s' +formatter = logging.Formatter(fmt=format, datefmt='%Y/%m/%d %H:%M:%S') + +stream_handler = logging.StreamHandler() +stream_handler.setLevel(logging.INFO) +stream_handler.setFormatter(formatter) +root.addHandler(stream_handler) +save_path = 'global.log' +file_handler = logging.FileHandler(save_path) +file_handler.setLevel(logging.DEBUG) +file_handler.setFormatter(formatter) +root.addHandler(file_handler) + +# stop propagate for propagating may print +# log multiple times +root.propagate = False + logger = logging.getLogger(__name__) ########### hcak logging ############# @@ -409,13 +430,13 @@ def ctc_loss(logits, input_lengths, label_lengths) loss_out = paddle.fluid.layers.squeeze(loss_out, [-1]) - logger.info(f"warpctc loss: {loss_out}/{loss_out.shape} ") + logger.debug(f"warpctc loss: {loss_out}/{loss_out.shape} ") assert reduction in ['mean', 'sum', 'none'] if reduction == 'mean': loss_out = paddle.mean(loss_out / label_lengths) elif reduction == 'sum': loss_out = paddle.sum(loss_out) - logger.info(f"ctc loss: {loss_out}") + logger.debug(f"ctc loss: {loss_out}") return loss_out diff --git a/deepspeech/exps/deepspeech2/model.py b/deepspeech/exps/deepspeech2/model.py index ebf479172..adc615321 100644 --- a/deepspeech/exps/deepspeech2/model.py +++ b/deepspeech/exps/deepspeech2/model.py @@ -45,7 +45,7 @@ class DeepSpeech2Trainer(Trainer): def __init__(self, config, args): super().__init__(config, args) - def train_batch(self, batch_data, msg): + def train_batch(self, batch_index, batch_data, msg): self.model.train() start = time.time() diff --git a/deepspeech/exps/u2/model.py b/deepspeech/exps/u2/model.py index 87d0c94db..a020e997b 100644 --- a/deepspeech/exps/u2/model.py +++ b/deepspeech/exps/u2/model.py @@ -75,7 +75,7 @@ class U2Trainer(Trainer): def __init__(self, config, args): super().__init__(config, args) - def train_batch(self, batch_data, msg): + def train_batch(self, batch_index, batch_data, msg): train_conf = self.config.training self.model.train() @@ -93,7 +93,7 @@ class U2Trainer(Trainer): 'train_ctc_loss': float(ctc_loss), } - if (self.iteration + 1) % train_conf.accum_grad == 0: + if (batch_index + 1) % train_conf.accum_grad == 0: if dist.get_rank() == 0 and self.visualizer: for k, v in losses_np.items(): self.visualizer.add_scalar("train/{}".format(k), v, @@ -105,7 +105,7 @@ class U2Trainer(Trainer): iteration_time = time.time() - start - if (self.iteration + 1) % train_conf.log_interval == 0: + if (batch_index + 1) % train_conf.log_interval == 0: msg += "time: {:>.3f}s, ".format(iteration_time) msg += "batch size: {}, ".format(self.config.data.batch_size) msg += "accum: {}, ".format(train_conf.accum_grad) @@ -136,14 +136,14 @@ class U2Trainer(Trainer): while self.epoch < self.config.training.n_epoch: try: data_start_time = time.time() - for batch in self.train_loader: + for batch_index, batch in enumerate(self.train_loader): dataload_time = time.time() - data_start_time msg = "Train: Rank: {}, ".format(dist.get_rank()) msg += "epoch: {}, ".format(self.epoch) msg += "step: {}, ".format(self.iteration) msg += "lr: {:>.8f}, ".format(self.lr_scheduler()) msg += "dataloader time: {:>.3f}s, ".format(dataload_time) - self.train_batch(batch, msg) + self.train_batch(batch_index, batch, msg) data_start_time = time.time() except Exception as e: self.logger.error(e) diff --git a/deepspeech/io/sampler.py b/deepspeech/io/sampler.py index 2dbaf90cb..7acb88cf2 100644 --- a/deepspeech/io/sampler.py +++ b/deepspeech/io/sampler.py @@ -156,7 +156,7 @@ class SortagradDistributedBatchSampler(DistributedBatchSampler): for idx in _sample_iter: batch_indices.append(idx) if len(batch_indices) == self.batch_size: - logger.info( + logger.debug( f"rank: {dist.get_rank()} batch index: {batch_indices} ") yield batch_indices batch_indices = [] @@ -236,7 +236,7 @@ class SortagradBatchSampler(BatchSampler): for idx in _sample_iter: batch_indices.append(idx) if len(batch_indices) == self.batch_size: - logger.info( + logger.debug( f"rank: {dist.get_rank()} batch index: {batch_indices} ") yield batch_indices batch_indices = [] diff --git a/deepspeech/models/u2.py b/deepspeech/models/u2.py index 90f3e3227..a7b426356 100644 --- a/deepspeech/models/u2.py +++ b/deepspeech/models/u2.py @@ -16,6 +16,7 @@ Unified Streaming and Non-streaming Two-pass End-to-end Model for Speech Recogni (https://arxiv.org/pdf/2012.05481.pdf) """ +import time import sys from collections import defaultdict import logging @@ -156,7 +157,10 @@ class U2BaseModel(nn.Module): text_lengths.shape[0]), (speech.shape, speech_lengths.shape, text.shape, text_lengths.shape) # 1. Encoder + start = time.time() encoder_out, encoder_mask = self.encoder(speech, speech_lengths) + encoder_time = time.time() - start + logger.debug(f"encoder time: {encoder_time}") #TODO(Hui Zhang): sum not support bool type #encoder_out_lens = encoder_mask.squeeze(1).sum(1) #[B, 1, T] -> [B] encoder_out_lens = encoder_mask.squeeze(1).cast(paddle.int64).sum( @@ -165,14 +169,20 @@ class U2BaseModel(nn.Module): # 2a. Attention-decoder branch loss_att = None if self.ctc_weight != 1.0: + start = time.time() loss_att, acc_att = self._calc_att_loss(encoder_out, encoder_mask, text, text_lengths) + decoder_time = time.time() - start + logger.debug(f"decoder time: {decoder_time}") # 2b. CTC branch loss_ctc = None if self.ctc_weight != 0.0: + start = time.time() loss_ctc = self.ctc(encoder_out, encoder_out_lens, text, text_lengths) + ctc_time = time.time() - start + logger.debug(f"ctc time: {ctc_time}") if loss_ctc is None: loss = loss_att diff --git a/deepspeech/training/gradclip.py b/deepspeech/training/gradclip.py index 5a090796f..37fbb0f56 100644 --- a/deepspeech/training/gradclip.py +++ b/deepspeech/training/gradclip.py @@ -43,7 +43,7 @@ class ClipGradByGlobalNormWithLog(paddle.nn.ClipGradByGlobalNorm): merge_grad = layers.get_tensor_from_selected_rows(merge_grad) square = layers.square(merge_grad) sum_square = layers.reduce_sum(square) - logger.info( + logger.debug( f"Grad Before Clip: {p.name}: {float(layers.sqrt(layers.reduce_sum(layers.square(merge_grad))) ) }" ) sum_square_list.append(sum_square) @@ -55,7 +55,7 @@ class ClipGradByGlobalNormWithLog(paddle.nn.ClipGradByGlobalNorm): global_norm_var = layers.concat(sum_square_list) global_norm_var = layers.reduce_sum(global_norm_var) global_norm_var = layers.sqrt(global_norm_var) - logger.info(f"Grad Global Norm: {float(global_norm_var)}!!!!") + logger.debug(f"Grad Global Norm: {float(global_norm_var)}!!!!") max_global_norm = layers.fill_constant( shape=[1], dtype=global_norm_var.dtype, value=self.clip_norm) clip_var = layers.elementwise_div( @@ -68,7 +68,7 @@ class ClipGradByGlobalNormWithLog(paddle.nn.ClipGradByGlobalNorm): params_and_grads.append((p, g)) continue new_grad = layers.elementwise_mul(x=g, y=clip_var) - logger.info( + logger.debug( f"Grad After Clip: {p.name}: {float(layers.sqrt(layers.reduce_sum(layers.square(merge_grad))) ) }" ) params_and_grads.append((p, new_grad)) diff --git a/deepspeech/training/scheduler.py b/deepspeech/training/scheduler.py index 08e9d4121..5144ca5ae 100644 --- a/deepspeech/training/scheduler.py +++ b/deepspeech/training/scheduler.py @@ -18,10 +18,10 @@ from typeguard import check_argument_types from paddle.optimizer.lr import LRScheduler -logger = logging.getLogger(__name__) - __all__ = ["WarmupLR"] +logger = logging.getLogger(__name__) + class WarmupLR(LRScheduler): """The WarmupLR scheduler diff --git a/deepspeech/training/trainer.py b/deepspeech/training/trainer.py index 0f18e3ba0..a2d0ee489 100644 --- a/deepspeech/training/trainer.py +++ b/deepspeech/training/trainer.py @@ -14,7 +14,6 @@ import time import logging -import logging.handlers from pathlib import Path import paddle @@ -26,6 +25,8 @@ from deepspeech.utils import mp_tools __all__ = ["Trainer"] +logger = logging.getLogger(__name__) + class Trainer(): """ @@ -186,14 +187,14 @@ class Trainer(): while self.epoch < self.config.training.n_epoch: try: data_start_time = time.time() - for batch in self.train_loader: + for batch_index, batch in enumerate(self.train_loader): dataload_time = time.time() - data_start_time msg = "Train: Rank: {}, ".format(dist.get_rank()) msg += "epoch: {}, ".format(self.epoch) msg += "step: {}, ".format(self.iteration) msg += "lr: {:>.8f}, ".format(self.lr_scheduler()) msg += "dataloader time: {:>.3f}s, ".format(dataload_time) - self.train_batch(batch, msg) + self.train_batch(batch_index, batch, msg) data_start_time = time.time() except Exception as e: self.logger.error(e) @@ -215,6 +216,7 @@ class Trainer(): exit(-1) finally: self.destory() + self.logger.info("Training Done.") def setup_output_dir(self): """Create a directory used for output. @@ -279,41 +281,6 @@ class Trainer(): backup - how many backup file to keep default value: 7 """ - when = 'D' - backup = 7 - format = '[%(levelname)s %(asctime)s %(filename)s:%(lineno)d] %(message)s' - formatter = logging.Formatter(fmt=format, datefmt='%Y/%m/%d %H:%M:%S') - - logger = logging.getLogger(__name__) - logger.setLevel("INFO") - - stream_handler = logging.StreamHandler() - stream_handler.setFormatter(formatter) - logger.addHandler(stream_handler) - - log_file = self.output_dir / 'worker_{}.log'.format(dist.get_rank()) - # file_handler = logging.FileHandler(str(log_file)) - # file_handler.setFormatter(formatter) - # logger.addHandler(file_handler) - - # handler = logging.handlers.TimedRotatingFileHandler( - # str(self.output_dir / "warning.log"), when=when, backupCount=backup) - # handler.setLevel(logging.WARNING) - # handler.setFormatter(formatter) - # logger.addHandler(handler) - - # stop propagate for propagating may print - # log multiple times - logger.propagate = False - - # global logger - stdout = False - save_path = str(log_file) - logging.basicConfig( - level=logging.DEBUG if stdout else logging.INFO, - format=format, - datefmt='%Y/%m/%d %H:%M:%S', - filename=save_path if not stdout else None) self.logger = logger @mp_tools.rank_zero_only diff --git a/deepspeech/utils/layer_tools.py b/deepspeech/utils/layer_tools.py index 9232c7fa5..0138ff1a9 100644 --- a/deepspeech/utils/layer_tools.py +++ b/deepspeech/utils/layer_tools.py @@ -67,13 +67,17 @@ def unfreeze(layer: nn.Layer): def print_grads(model, print_func=print): + if print_func is None: + return for n, p in model.named_parameters(): msg = f"param grad: {n}: shape: {p.shape} grad: {p.grad}" - if print_func: - print_func(msg) + print_func(msg) def print_params(model, print_func=print): + if print_func is None: + return + total = 0.0 for n, p in model.named_parameters(): msg = f"param: {n}: shape: {p.shape} stop_grad: {p.stop_gradient}" diff --git a/examples/tiny/s1/conf/conformer.yaml b/examples/tiny/s1/conf/conformer.yaml index 0e58002af..09f877470 100644 --- a/examples/tiny/s1/conf/conformer.yaml +++ b/examples/tiny/s1/conf/conformer.yaml @@ -124,7 +124,7 @@ training: scheduler_conf: warmup_steps: 25000 lr_decay: 1.0 - log_interval: 100 + log_interval: 1 decoding: diff --git a/requirements.txt b/requirements.txt index 2d021a4b7..8e004cfae 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,6 +4,7 @@ python_speech_features resampy==0.2.2 scipy==1.2.1 sentencepiece +snakeviz SoundFile==0.9.0.post1 sox tensorboardX