pull/578/head
Hui Zhang 5 years ago
parent 5ea181b7ab
commit 926b1876c7

@ -22,6 +22,27 @@ from paddle import nn
from paddle.nn import functional as F
#TODO(Hui Zhang): remove fluid import
from paddle.fluid import core
root = logging.getLogger()
root.setLevel(logging.DEBUG)
format = '[%(levelname)s %(asctime)s %(filename)s:%(lineno)d] %(message)s'
formatter = logging.Formatter(fmt=format, datefmt='%Y/%m/%d %H:%M:%S')
stream_handler = logging.StreamHandler()
stream_handler.setLevel(logging.INFO)
stream_handler.setFormatter(formatter)
root.addHandler(stream_handler)
save_path = 'global.log'
file_handler = logging.FileHandler(save_path)
file_handler.setLevel(logging.DEBUG)
file_handler.setFormatter(formatter)
root.addHandler(file_handler)
# stop propagate for propagating may print
# log multiple times
root.propagate = False
logger = logging.getLogger(__name__)
########### hcak logging #############
@ -409,13 +430,13 @@ def ctc_loss(logits,
input_lengths, label_lengths)
loss_out = paddle.fluid.layers.squeeze(loss_out, [-1])
logger.info(f"warpctc loss: {loss_out}/{loss_out.shape} ")
logger.debug(f"warpctc loss: {loss_out}/{loss_out.shape} ")
assert reduction in ['mean', 'sum', 'none']
if reduction == 'mean':
loss_out = paddle.mean(loss_out / label_lengths)
elif reduction == 'sum':
loss_out = paddle.sum(loss_out)
logger.info(f"ctc loss: {loss_out}")
logger.debug(f"ctc loss: {loss_out}")
return loss_out

@ -45,7 +45,7 @@ class DeepSpeech2Trainer(Trainer):
def __init__(self, config, args):
super().__init__(config, args)
def train_batch(self, batch_data, msg):
def train_batch(self, batch_index, batch_data, msg):
self.model.train()
start = time.time()

@ -75,7 +75,7 @@ class U2Trainer(Trainer):
def __init__(self, config, args):
super().__init__(config, args)
def train_batch(self, batch_data, msg):
def train_batch(self, batch_index, batch_data, msg):
train_conf = self.config.training
self.model.train()
@ -93,7 +93,7 @@ class U2Trainer(Trainer):
'train_ctc_loss': float(ctc_loss),
}
if (self.iteration + 1) % train_conf.accum_grad == 0:
if (batch_index + 1) % train_conf.accum_grad == 0:
if dist.get_rank() == 0 and self.visualizer:
for k, v in losses_np.items():
self.visualizer.add_scalar("train/{}".format(k), v,
@ -105,7 +105,7 @@ class U2Trainer(Trainer):
iteration_time = time.time() - start
if (self.iteration + 1) % train_conf.log_interval == 0:
if (batch_index + 1) % train_conf.log_interval == 0:
msg += "time: {:>.3f}s, ".format(iteration_time)
msg += "batch size: {}, ".format(self.config.data.batch_size)
msg += "accum: {}, ".format(train_conf.accum_grad)
@ -136,14 +136,14 @@ class U2Trainer(Trainer):
while self.epoch < self.config.training.n_epoch:
try:
data_start_time = time.time()
for batch in self.train_loader:
for batch_index, batch in enumerate(self.train_loader):
dataload_time = time.time() - data_start_time
msg = "Train: Rank: {}, ".format(dist.get_rank())
msg += "epoch: {}, ".format(self.epoch)
msg += "step: {}, ".format(self.iteration)
msg += "lr: {:>.8f}, ".format(self.lr_scheduler())
msg += "dataloader time: {:>.3f}s, ".format(dataload_time)
self.train_batch(batch, msg)
self.train_batch(batch_index, batch, msg)
data_start_time = time.time()
except Exception as e:
self.logger.error(e)

@ -156,7 +156,7 @@ class SortagradDistributedBatchSampler(DistributedBatchSampler):
for idx in _sample_iter:
batch_indices.append(idx)
if len(batch_indices) == self.batch_size:
logger.info(
logger.debug(
f"rank: {dist.get_rank()} batch index: {batch_indices} ")
yield batch_indices
batch_indices = []
@ -236,7 +236,7 @@ class SortagradBatchSampler(BatchSampler):
for idx in _sample_iter:
batch_indices.append(idx)
if len(batch_indices) == self.batch_size:
logger.info(
logger.debug(
f"rank: {dist.get_rank()} batch index: {batch_indices} ")
yield batch_indices
batch_indices = []

@ -16,6 +16,7 @@ Unified Streaming and Non-streaming Two-pass End-to-end Model for Speech Recogni
(https://arxiv.org/pdf/2012.05481.pdf)
"""
import time
import sys
from collections import defaultdict
import logging
@ -156,7 +157,10 @@ class U2BaseModel(nn.Module):
text_lengths.shape[0]), (speech.shape, speech_lengths.shape,
text.shape, text_lengths.shape)
# 1. Encoder
start = time.time()
encoder_out, encoder_mask = self.encoder(speech, speech_lengths)
encoder_time = time.time() - start
logger.debug(f"encoder time: {encoder_time}")
#TODO(Hui Zhang): sum not support bool type
#encoder_out_lens = encoder_mask.squeeze(1).sum(1) #[B, 1, T] -> [B]
encoder_out_lens = encoder_mask.squeeze(1).cast(paddle.int64).sum(
@ -165,14 +169,20 @@ class U2BaseModel(nn.Module):
# 2a. Attention-decoder branch
loss_att = None
if self.ctc_weight != 1.0:
start = time.time()
loss_att, acc_att = self._calc_att_loss(encoder_out, encoder_mask,
text, text_lengths)
decoder_time = time.time() - start
logger.debug(f"decoder time: {decoder_time}")
# 2b. CTC branch
loss_ctc = None
if self.ctc_weight != 0.0:
start = time.time()
loss_ctc = self.ctc(encoder_out, encoder_out_lens, text,
text_lengths)
ctc_time = time.time() - start
logger.debug(f"ctc time: {ctc_time}")
if loss_ctc is None:
loss = loss_att

@ -43,7 +43,7 @@ class ClipGradByGlobalNormWithLog(paddle.nn.ClipGradByGlobalNorm):
merge_grad = layers.get_tensor_from_selected_rows(merge_grad)
square = layers.square(merge_grad)
sum_square = layers.reduce_sum(square)
logger.info(
logger.debug(
f"Grad Before Clip: {p.name}: {float(layers.sqrt(layers.reduce_sum(layers.square(merge_grad))) ) }"
)
sum_square_list.append(sum_square)
@ -55,7 +55,7 @@ class ClipGradByGlobalNormWithLog(paddle.nn.ClipGradByGlobalNorm):
global_norm_var = layers.concat(sum_square_list)
global_norm_var = layers.reduce_sum(global_norm_var)
global_norm_var = layers.sqrt(global_norm_var)
logger.info(f"Grad Global Norm: {float(global_norm_var)}!!!!")
logger.debug(f"Grad Global Norm: {float(global_norm_var)}!!!!")
max_global_norm = layers.fill_constant(
shape=[1], dtype=global_norm_var.dtype, value=self.clip_norm)
clip_var = layers.elementwise_div(
@ -68,7 +68,7 @@ class ClipGradByGlobalNormWithLog(paddle.nn.ClipGradByGlobalNorm):
params_and_grads.append((p, g))
continue
new_grad = layers.elementwise_mul(x=g, y=clip_var)
logger.info(
logger.debug(
f"Grad After Clip: {p.name}: {float(layers.sqrt(layers.reduce_sum(layers.square(merge_grad))) ) }"
)
params_and_grads.append((p, new_grad))

@ -18,10 +18,10 @@ from typeguard import check_argument_types
from paddle.optimizer.lr import LRScheduler
logger = logging.getLogger(__name__)
__all__ = ["WarmupLR"]
logger = logging.getLogger(__name__)
class WarmupLR(LRScheduler):
"""The WarmupLR scheduler

@ -14,7 +14,6 @@
import time
import logging
import logging.handlers
from pathlib import Path
import paddle
@ -26,6 +25,8 @@ from deepspeech.utils import mp_tools
__all__ = ["Trainer"]
logger = logging.getLogger(__name__)
class Trainer():
"""
@ -186,14 +187,14 @@ class Trainer():
while self.epoch < self.config.training.n_epoch:
try:
data_start_time = time.time()
for batch in self.train_loader:
for batch_index, batch in enumerate(self.train_loader):
dataload_time = time.time() - data_start_time
msg = "Train: Rank: {}, ".format(dist.get_rank())
msg += "epoch: {}, ".format(self.epoch)
msg += "step: {}, ".format(self.iteration)
msg += "lr: {:>.8f}, ".format(self.lr_scheduler())
msg += "dataloader time: {:>.3f}s, ".format(dataload_time)
self.train_batch(batch, msg)
self.train_batch(batch_index, batch, msg)
data_start_time = time.time()
except Exception as e:
self.logger.error(e)
@ -215,6 +216,7 @@ class Trainer():
exit(-1)
finally:
self.destory()
self.logger.info("Training Done.")
def setup_output_dir(self):
"""Create a directory used for output.
@ -279,41 +281,6 @@ class Trainer():
backup - how many backup file to keep
default value: 7
"""
when = 'D'
backup = 7
format = '[%(levelname)s %(asctime)s %(filename)s:%(lineno)d] %(message)s'
formatter = logging.Formatter(fmt=format, datefmt='%Y/%m/%d %H:%M:%S')
logger = logging.getLogger(__name__)
logger.setLevel("INFO")
stream_handler = logging.StreamHandler()
stream_handler.setFormatter(formatter)
logger.addHandler(stream_handler)
log_file = self.output_dir / 'worker_{}.log'.format(dist.get_rank())
# file_handler = logging.FileHandler(str(log_file))
# file_handler.setFormatter(formatter)
# logger.addHandler(file_handler)
# handler = logging.handlers.TimedRotatingFileHandler(
# str(self.output_dir / "warning.log"), when=when, backupCount=backup)
# handler.setLevel(logging.WARNING)
# handler.setFormatter(formatter)
# logger.addHandler(handler)
# stop propagate for propagating may print
# log multiple times
logger.propagate = False
# global logger
stdout = False
save_path = str(log_file)
logging.basicConfig(
level=logging.DEBUG if stdout else logging.INFO,
format=format,
datefmt='%Y/%m/%d %H:%M:%S',
filename=save_path if not stdout else None)
self.logger = logger
@mp_tools.rank_zero_only

@ -67,13 +67,17 @@ def unfreeze(layer: nn.Layer):
def print_grads(model, print_func=print):
if print_func is None:
return
for n, p in model.named_parameters():
msg = f"param grad: {n}: shape: {p.shape} grad: {p.grad}"
if print_func:
print_func(msg)
def print_params(model, print_func=print):
if print_func is None:
return
total = 0.0
for n, p in model.named_parameters():
msg = f"param: {n}: shape: {p.shape} stop_grad: {p.stop_gradient}"

@ -124,7 +124,7 @@ training:
scheduler_conf:
warmup_steps: 25000
lr_decay: 1.0
log_interval: 100
log_interval: 1
decoding:

@ -4,6 +4,7 @@ python_speech_features
resampy==0.2.2
scipy==1.2.1
sentencepiece
snakeviz
SoundFile==0.9.0.post1
sox
tensorboardX

Loading…
Cancel
Save