From 0ff57cec186593fbe5fe2b991d1b895dca651d5d Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Mon, 17 May 2021 03:18:51 +0000 Subject: [PATCH] default cmvn compute config; more log of grad clip; diff ds2 cmvn compute and conf; ds2 lr step by epoch; --- deepspeech/exps/deepspeech2/model.py | 2 -- deepspeech/training/gradclip.py | 14 ++++++++------ deepspeech/training/trainer.py | 3 +-- deepspeech/utils/layer_tools.py | 4 ++-- examples/aishell/s0/conf/deepspeech2.yaml | 8 ++++---- examples/aishell/s0/local/data.sh | 6 +++--- tests/mask_test.py | 18 ++++++++++-------- utils/compute_mean_std.py | 4 ++-- 8 files changed, 30 insertions(+), 29 deletions(-) diff --git a/deepspeech/exps/deepspeech2/model.py b/deepspeech/exps/deepspeech2/model.py index c1fe82250..643936f17 100644 --- a/deepspeech/exps/deepspeech2/model.py +++ b/deepspeech/exps/deepspeech2/model.py @@ -43,13 +43,11 @@ class DeepSpeech2Trainer(Trainer): def train_batch(self, batch_index, batch_data, msg): start = time.time() - loss = self.model(*batch_data) loss.backward() layer_tools.print_grads(self.model, print_func=None) self.optimizer.step() self.optimizer.clear_grad() - iteration_time = time.time() - start losses_np = { diff --git a/deepspeech/training/gradclip.py b/deepspeech/training/gradclip.py index 6c106f340..912861f64 100644 --- a/deepspeech/training/gradclip.py +++ b/deepspeech/training/gradclip.py @@ -31,7 +31,7 @@ class ClipGradByGlobalNormWithLog(paddle.nn.ClipGradByGlobalNorm): def _dygraph_clip(self, params_grads): params_and_grads = [] sum_square_list = [] - for p, g in params_grads: + for i, (p, g) in enumerate(params_grads): if g is None: continue if getattr(p, 'need_clip', True) is False: @@ -45,7 +45,8 @@ class ClipGradByGlobalNormWithLog(paddle.nn.ClipGradByGlobalNorm): sum_square_list.append(sum_square) # debug log - # logger.debug(f"Grad Before Clip: {p.name}: {float(sum_square.sqrt()) }") + if i < 10: + logger.debug(f"Grad Before Clip: {p.name}: {float(sum_square.sqrt()) }") # all parameters have been filterd out if len(sum_square_list) == 0: @@ -62,7 +63,7 @@ class ClipGradByGlobalNormWithLog(paddle.nn.ClipGradByGlobalNorm): clip_var = layers.elementwise_div( x=max_global_norm, y=layers.elementwise_max(x=global_norm_var, y=max_global_norm)) - for p, g in params_grads: + for i, (p, g) in enumerate(params_grads): if g is None: continue if getattr(p, 'need_clip', True) is False: @@ -72,8 +73,9 @@ class ClipGradByGlobalNormWithLog(paddle.nn.ClipGradByGlobalNorm): params_and_grads.append((p, new_grad)) # debug log - # logger.debug( - # f"Grad After Clip: {p.name}: {float(merge_grad.square().sum().sqrt())}" - # ) + if i < 10: + logger.debug( + f"Grad After Clip: {p.name}: {float(new_grad.square().sum().sqrt())}" + ) return params_and_grads diff --git a/deepspeech/training/trainer.py b/deepspeech/training/trainer.py index e630febbc..56de32617 100644 --- a/deepspeech/training/trainer.py +++ b/deepspeech/training/trainer.py @@ -226,6 +226,7 @@ class Trainer(): 'lr': self.lr_scheduler()}, self.epoch) self.save(tag=self.epoch, infos={'val_loss': cv_loss}) + # step lr every epoch self.lr_scheduler.step() self.new_epoch() @@ -283,7 +284,6 @@ class Trainer(): """ # visualizer visualizer = SummaryWriter(logdir=str(self.output_dir)) - self.visualizer = visualizer @mp_tools.rank_zero_only @@ -301,7 +301,6 @@ class Trainer(): """ raise NotImplementedError("train_batch should be implemented.") - @mp_tools.rank_zero_only @paddle.no_grad() def valid(self): """The validation. A subclass should implement this method. diff --git a/deepspeech/utils/layer_tools.py b/deepspeech/utils/layer_tools.py index 67f3c9396..fb076c0c7 100644 --- a/deepspeech/utils/layer_tools.py +++ b/deepspeech/utils/layer_tools.py @@ -33,7 +33,7 @@ def summary(layer: nn.Layer, print_func=print): if print_func: num_elements = num_elements / 1024**2 print_func( - f"Total parameters: {num_params}, {num_elements:.2f} M elements.") + f"Total parameters: {num_params}, {num_elements:.2f}M elements.") def print_grads(model, print_func=print): @@ -57,7 +57,7 @@ def print_params(model, print_func=print): print_func(msg) if print_func: total = total / 1024**2 - print_func(f"Total parameters: {num_params}, {total:.2f} M elements.") + print_func(f"Total parameters: {num_params}, {total:.2f}M elements.") def gradient_norm(layer: nn.Layer): diff --git a/examples/aishell/s0/conf/deepspeech2.yaml b/examples/aishell/s0/conf/deepspeech2.yaml index 02c68df9c..8b08ee308 100644 --- a/examples/aishell/s0/conf/deepspeech2.yaml +++ b/examples/aishell/s0/conf/deepspeech2.yaml @@ -10,9 +10,9 @@ data: min_input_len: 0.0 max_input_len: 27.0 # second min_output_len: 0.0 - max_output_len: 400.0 - min_output_input_ratio: 0.05 - max_output_input_ratio: 10.0 + max_output_len: .inf + min_output_input_ratio: 0.00 + max_output_input_ratio: .inf specgram_type: linear target_sample_rate: 16000 max_freq: None @@ -41,7 +41,7 @@ training: lr: 2e-3 lr_decay: 0.83 weight_decay: 1e-06 - global_grad_clip: 5.0 + global_grad_clip: 3.0 log_interval: 100 decoding: diff --git a/examples/aishell/s0/local/data.sh b/examples/aishell/s0/local/data.sh index f2a5dfc36..c92152c7c 100755 --- a/examples/aishell/s0/local/data.sh +++ b/examples/aishell/s0/local/data.sh @@ -32,7 +32,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then --unit_type="char" \ --count_threshold=0 \ --vocab_path="data/vocab.txt" \ - --manifest_paths "data/manifest.train.raw" + --manifest_paths "data/manifest.train.raw" "data/manifest.dev.raw" if [ $? -ne 0 ]; then echo "Build vocabulary failed. Terminated." @@ -51,8 +51,8 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then --stride_ms=10.0 \ --window_ms=20.0 \ --sample_rate=16000 \ - --use_dB_normalization=False \ - --num_samples=-1 \ + --use_dB_normalization=True \ + --num_samples=2000 \ --num_workers=${num_workers} \ --output_path="data/mean_std.json" diff --git a/tests/mask_test.py b/tests/mask_test.py index c4a843e32..ce1a673a5 100644 --- a/tests/mask_test.py +++ b/tests/mask_test.py @@ -26,25 +26,27 @@ class TestU2Model(unittest.TestCase): paddle.set_device('cpu') self.lengths = paddle.to_tensor([5, 3, 2]) self.masks = np.array([ - [1, 1, 1, 1, 1], - [1, 1, 1, 0, 0], - [1, 1, 0, 0, 0], + [True, True, True, True, True], + [True, True, True, False, False], + [True, True, False, False, False], ]) self.pad_masks = np.array([ - [0, 0, 0, 0, 0], - [0, 0, 0, 1, 1], - [0, 0, 1, 1, 1], + [False, False, False, False, False], + [False, False, False, True, True], + [False, False, True, True, True], ]) def test_sequence_mask(self): - res = sequence_mask(self.lengths) + res = sequence_mask(self.lengths, dtype='bool') self.assertSequenceEqual(res.numpy().tolist(), self.masks.tolist()) def test_make_non_pad_mask(self): res = make_non_pad_mask(self.lengths) - res1 = sequence_mask(self.lengths) + res1 = sequence_mask(self.lengths, dtype='bool') + res2 = make_pad_mask(self.lengths).logical_not() self.assertSequenceEqual(res.numpy().tolist(), self.masks.tolist()) self.assertSequenceEqual(res.numpy().tolist(), res1.numpy().tolist()) + self.assertSequenceEqual(res.numpy().tolist(), res2.numpy().tolist()) def test_make_pad_mask(self): res = make_pad_mask(self.lengths) diff --git a/utils/compute_mean_std.py b/utils/compute_mean_std.py index 8dfd3e590..aff6f47c6 100644 --- a/utils/compute_mean_std.py +++ b/utils/compute_mean_std.py @@ -24,7 +24,7 @@ from deepspeech.utils.utility import print_arguments parser = argparse.ArgumentParser(description=__doc__) add_arg = functools.partial(add_arguments, argparser=parser) # yapf: disable -add_arg('num_samples', int, -1, "# of samples to for statistics.") +add_arg('num_samples', int, 2000, "# of samples to for statistics.") add_arg('specgram_type', str, 'linear', @@ -35,7 +35,7 @@ add_arg('delta_delta', bool, False, "Audio feature with delta delta.") add_arg('stride_ms', float, 10.0, "stride length in ms.") add_arg('window_ms', float, 20.0, "stride length in ms.") add_arg('sample_rate', int, 16000, "target sample rate.") -add_arg('use_dB_normalization', bool, False, "do dB normalization.") +add_arg('use_dB_normalization', bool, True, "do dB normalization.") add_arg('target_dB', int, -20, "target dB.") add_arg('manifest_path', str,