default cmvn compute config; more log of grad clip; diff ds2 cmvn compute and conf; ds2 lr step by epoch;

pull/621/head
Hui Zhang 4 years ago
parent 7af055631b
commit 0ff57cec18

@ -43,13 +43,11 @@ class DeepSpeech2Trainer(Trainer):
def train_batch(self, batch_index, batch_data, msg):
start = time.time()
loss = self.model(*batch_data)
loss.backward()
layer_tools.print_grads(self.model, print_func=None)
self.optimizer.step()
self.optimizer.clear_grad()
iteration_time = time.time() - start
losses_np = {

@ -31,7 +31,7 @@ class ClipGradByGlobalNormWithLog(paddle.nn.ClipGradByGlobalNorm):
def _dygraph_clip(self, params_grads):
params_and_grads = []
sum_square_list = []
for p, g in params_grads:
for i, (p, g) in enumerate(params_grads):
if g is None:
continue
if getattr(p, 'need_clip', True) is False:
@ -45,7 +45,8 @@ class ClipGradByGlobalNormWithLog(paddle.nn.ClipGradByGlobalNorm):
sum_square_list.append(sum_square)
# debug log
# logger.debug(f"Grad Before Clip: {p.name}: {float(sum_square.sqrt()) }")
if i < 10:
logger.debug(f"Grad Before Clip: {p.name}: {float(sum_square.sqrt()) }")
# all parameters have been filterd out
if len(sum_square_list) == 0:
@ -62,7 +63,7 @@ class ClipGradByGlobalNormWithLog(paddle.nn.ClipGradByGlobalNorm):
clip_var = layers.elementwise_div(
x=max_global_norm,
y=layers.elementwise_max(x=global_norm_var, y=max_global_norm))
for p, g in params_grads:
for i, (p, g) in enumerate(params_grads):
if g is None:
continue
if getattr(p, 'need_clip', True) is False:
@ -72,8 +73,9 @@ class ClipGradByGlobalNormWithLog(paddle.nn.ClipGradByGlobalNorm):
params_and_grads.append((p, new_grad))
# debug log
# logger.debug(
# f"Grad After Clip: {p.name}: {float(merge_grad.square().sum().sqrt())}"
# )
if i < 10:
logger.debug(
f"Grad After Clip: {p.name}: {float(new_grad.square().sum().sqrt())}"
)
return params_and_grads

@ -226,6 +226,7 @@ class Trainer():
'lr': self.lr_scheduler()}, self.epoch)
self.save(tag=self.epoch, infos={'val_loss': cv_loss})
# step lr every epoch
self.lr_scheduler.step()
self.new_epoch()
@ -283,7 +284,6 @@ class Trainer():
"""
# visualizer
visualizer = SummaryWriter(logdir=str(self.output_dir))
self.visualizer = visualizer
@mp_tools.rank_zero_only
@ -301,7 +301,6 @@ class Trainer():
"""
raise NotImplementedError("train_batch should be implemented.")
@mp_tools.rank_zero_only
@paddle.no_grad()
def valid(self):
"""The validation. A subclass should implement this method.

@ -10,9 +10,9 @@ data:
min_input_len: 0.0
max_input_len: 27.0 # second
min_output_len: 0.0
max_output_len: 400.0
min_output_input_ratio: 0.05
max_output_input_ratio: 10.0
max_output_len: .inf
min_output_input_ratio: 0.00
max_output_input_ratio: .inf
specgram_type: linear
target_sample_rate: 16000
max_freq: None
@ -41,7 +41,7 @@ training:
lr: 2e-3
lr_decay: 0.83
weight_decay: 1e-06
global_grad_clip: 5.0
global_grad_clip: 3.0
log_interval: 100
decoding:

@ -32,7 +32,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
--unit_type="char" \
--count_threshold=0 \
--vocab_path="data/vocab.txt" \
--manifest_paths "data/manifest.train.raw"
--manifest_paths "data/manifest.train.raw" "data/manifest.dev.raw"
if [ $? -ne 0 ]; then
echo "Build vocabulary failed. Terminated."
@ -51,8 +51,8 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
--stride_ms=10.0 \
--window_ms=20.0 \
--sample_rate=16000 \
--use_dB_normalization=False \
--num_samples=-1 \
--use_dB_normalization=True \
--num_samples=2000 \
--num_workers=${num_workers} \
--output_path="data/mean_std.json"

@ -26,25 +26,27 @@ class TestU2Model(unittest.TestCase):
paddle.set_device('cpu')
self.lengths = paddle.to_tensor([5, 3, 2])
self.masks = np.array([
[1, 1, 1, 1, 1],
[1, 1, 1, 0, 0],
[1, 1, 0, 0, 0],
[True, True, True, True, True],
[True, True, True, False, False],
[True, True, False, False, False],
])
self.pad_masks = np.array([
[0, 0, 0, 0, 0],
[0, 0, 0, 1, 1],
[0, 0, 1, 1, 1],
[False, False, False, False, False],
[False, False, False, True, True],
[False, False, True, True, True],
])
def test_sequence_mask(self):
res = sequence_mask(self.lengths)
res = sequence_mask(self.lengths, dtype='bool')
self.assertSequenceEqual(res.numpy().tolist(), self.masks.tolist())
def test_make_non_pad_mask(self):
res = make_non_pad_mask(self.lengths)
res1 = sequence_mask(self.lengths)
res1 = sequence_mask(self.lengths, dtype='bool')
res2 = make_pad_mask(self.lengths).logical_not()
self.assertSequenceEqual(res.numpy().tolist(), self.masks.tolist())
self.assertSequenceEqual(res.numpy().tolist(), res1.numpy().tolist())
self.assertSequenceEqual(res.numpy().tolist(), res2.numpy().tolist())
def test_make_pad_mask(self):
res = make_pad_mask(self.lengths)

@ -24,7 +24,7 @@ from deepspeech.utils.utility import print_arguments
parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable
add_arg('num_samples', int, -1, "# of samples to for statistics.")
add_arg('num_samples', int, 2000, "# of samples to for statistics.")
add_arg('specgram_type', str,
'linear',
@ -35,7 +35,7 @@ add_arg('delta_delta', bool, False, "Audio feature with delta delta.")
add_arg('stride_ms', float, 10.0, "stride length in ms.")
add_arg('window_ms', float, 20.0, "stride length in ms.")
add_arg('sample_rate', int, 16000, "target sample rate.")
add_arg('use_dB_normalization', bool, False, "do dB normalization.")
add_arg('use_dB_normalization', bool, True, "do dB normalization.")
add_arg('target_dB', int, -20, "target dB.")
add_arg('manifest_path', str,

Loading…
Cancel
Save