From b1c80c45e01777701ab1d1f91b41cb9e58835c5b Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Fri, 10 Dec 2021 09:32:27 +0000 Subject: [PATCH] remove ctc grad norm type in config --- examples/aishell/asr0/conf/deepspeech2_online.yaml | 2 +- examples/librispeech/asr0/conf/deepspeech2.yaml | 2 +- examples/librispeech/asr0/conf/deepspeech2_online.yaml | 2 +- examples/librispeech/asr1/conf/chunk_conformer.yaml | 2 -- examples/librispeech/asr1/conf/chunk_transformer.yaml | 2 -- examples/librispeech/asr1/conf/conformer.yaml | 2 -- examples/librispeech/asr1/conf/transformer.yaml | 4 +--- examples/librispeech/asr2/conf/transformer.yaml | 2 -- examples/ted_en_zh/st0/conf/transformer.yaml | 2 -- examples/ted_en_zh/st0/conf/transformer_mtl_noam.yaml | 2 -- examples/ted_en_zh/st1/conf/transformer.yaml | 2 -- examples/ted_en_zh/st1/conf/transformer_mtl_noam.yaml | 2 -- examples/timit/asr1/conf/transformer.yaml | 2 -- examples/tiny/asr0/conf/deepspeech2.yaml | 2 +- examples/tiny/asr0/conf/deepspeech2_online.yaml | 2 +- examples/tiny/asr1/conf/chunk_confermer.yaml | 2 -- examples/tiny/asr1/conf/chunk_transformer.yaml | 2 -- examples/tiny/asr1/conf/conformer.yaml | 2 -- examples/tiny/asr1/conf/transformer.yaml | 2 -- examples/wenetspeech/asr1/conf/conformer.yaml | 2 -- paddlespeech/s2t/models/ds2/deepspeech2.py | 10 ++++------ paddlespeech/s2t/models/ds2_online/deepspeech2.py | 8 ++++---- paddlespeech/s2t/models/u2/u2.py | 6 ++++-- paddlespeech/s2t/models/u2_st/u2_st.py | 6 ++++-- tests/unit/asr/u2_model_test.py | 4 ---- 25 files changed, 22 insertions(+), 54 deletions(-) diff --git a/examples/aishell/asr0/conf/deepspeech2_online.yaml b/examples/aishell/asr0/conf/deepspeech2_online.yaml index 010d8f15..2f63f4de 100644 --- a/examples/aishell/asr0/conf/deepspeech2_online.yaml +++ b/examples/aishell/asr0/conf/deepspeech2_online.yaml @@ -43,7 +43,7 @@ model: fc_layers_size_list: -1, use_gru: False blank_id: 0 - ctc_grad_norm_type: null + training: n_epoch: 65 diff --git a/examples/librispeech/asr0/conf/deepspeech2.yaml b/examples/librispeech/asr0/conf/deepspeech2.yaml index 70fa3fcb..f3574e15 100644 --- a/examples/librispeech/asr0/conf/deepspeech2.yaml +++ b/examples/librispeech/asr0/conf/deepspeech2.yaml @@ -41,7 +41,7 @@ model: use_gru: False share_rnn_weights: True blank_id: 0 - ctc_grad_norm_type: null + training: n_epoch: 50 diff --git a/examples/librispeech/asr0/conf/deepspeech2_online.yaml b/examples/librispeech/asr0/conf/deepspeech2_online.yaml index 3e07862d..0d16bc57 100644 --- a/examples/librispeech/asr0/conf/deepspeech2_online.yaml +++ b/examples/librispeech/asr0/conf/deepspeech2_online.yaml @@ -43,7 +43,7 @@ model: fc_layers_size_list: 512, 256 use_gru: False blank_id: 0 - ctc_grad_norm_type: null + training: n_epoch: 50 diff --git a/examples/librispeech/asr1/conf/chunk_conformer.yaml b/examples/librispeech/asr1/conf/chunk_conformer.yaml index 54580664..7f593037 100644 --- a/examples/librispeech/asr1/conf/chunk_conformer.yaml +++ b/examples/librispeech/asr1/conf/chunk_conformer.yaml @@ -76,8 +76,6 @@ model: # hybrid CTC/attention model_conf: ctc_weight: 0.3 - ctc_dropoutrate: 0.0 - ctc_grad_norm_type: null lsm_weight: 0.1 # label smoothing option length_normalized_loss: false diff --git a/examples/librispeech/asr1/conf/chunk_transformer.yaml b/examples/librispeech/asr1/conf/chunk_transformer.yaml index 70a9dc6a..366d6de0 100644 --- a/examples/librispeech/asr1/conf/chunk_transformer.yaml +++ b/examples/librispeech/asr1/conf/chunk_transformer.yaml @@ -69,8 +69,6 @@ model: # hybrid CTC/attention model_conf: ctc_weight: 0.3 - ctc_dropoutrate: 0.0 - ctc_grad_norm_type: null lsm_weight: 0.1 # label smoothing option length_normalized_loss: false diff --git a/examples/librispeech/asr1/conf/conformer.yaml b/examples/librispeech/asr1/conf/conformer.yaml index ca934eb1..f02f24dc 100644 --- a/examples/librispeech/asr1/conf/conformer.yaml +++ b/examples/librispeech/asr1/conf/conformer.yaml @@ -72,8 +72,6 @@ model: # hybrid CTC/attention model_conf: ctc_weight: 0.3 - ctc_dropoutrate: 0.0 - ctc_grad_norm_type: null lsm_weight: 0.1 # label smoothing option length_normalized_loss: false diff --git a/examples/librispeech/asr1/conf/transformer.yaml b/examples/librispeech/asr1/conf/transformer.yaml index 0cc0dae6..a90efe48 100644 --- a/examples/librispeech/asr1/conf/transformer.yaml +++ b/examples/librispeech/asr1/conf/transformer.yaml @@ -29,8 +29,6 @@ model: # hybrid CTC/attention model_conf: ctc_weight: 0.3 - ctc_dropoutrate: 0.0 - ctc_grad_norm_type: null lsm_weight: 0.1 # label smoothing option length_normalized_loss: false @@ -81,7 +79,7 @@ training: optim_conf: lr: 0.004 weight_decay: 1e-06 - scheduler: warmuplr # pytorch v1.1.0+ required + scheduler: warmuplr scheduler_conf: warmup_steps: 25000 lr_decay: 1.0 diff --git a/examples/librispeech/asr2/conf/transformer.yaml b/examples/librispeech/asr2/conf/transformer.yaml index 00240743..a16563a5 100644 --- a/examples/librispeech/asr2/conf/transformer.yaml +++ b/examples/librispeech/asr2/conf/transformer.yaml @@ -30,8 +30,6 @@ model: # hybrid CTC/attention model_conf: ctc_weight: 0.3 - ctc_dropoutrate: 0.0 - ctc_grad_norm_type: null lsm_weight: 0.1 # label smoothing option length_normalized_loss: false diff --git a/examples/ted_en_zh/st0/conf/transformer.yaml b/examples/ted_en_zh/st0/conf/transformer.yaml index 6ed75be4..36f287b1 100644 --- a/examples/ted_en_zh/st0/conf/transformer.yaml +++ b/examples/ted_en_zh/st0/conf/transformer.yaml @@ -68,8 +68,6 @@ model: model_conf: asr_weight: 0.0 ctc_weight: 0.0 - ctc_dropoutrate: 0.0 - ctc_grad_norm_type: null lsm_weight: 0.1 # label smoothing option length_normalized_loss: false diff --git a/examples/ted_en_zh/st0/conf/transformer_mtl_noam.yaml b/examples/ted_en_zh/st0/conf/transformer_mtl_noam.yaml index 7e886cca..78887d3c 100644 --- a/examples/ted_en_zh/st0/conf/transformer_mtl_noam.yaml +++ b/examples/ted_en_zh/st0/conf/transformer_mtl_noam.yaml @@ -68,8 +68,6 @@ model: model_conf: asr_weight: 0.5 ctc_weight: 0.3 - ctc_dropoutrate: 0.0 - ctc_grad_norm_type: null lsm_weight: 0.1 # label smoothing option length_normalized_loss: false diff --git a/examples/ted_en_zh/st1/conf/transformer.yaml b/examples/ted_en_zh/st1/conf/transformer.yaml index 3bef7bc5..609c5824 100644 --- a/examples/ted_en_zh/st1/conf/transformer.yaml +++ b/examples/ted_en_zh/st1/conf/transformer.yaml @@ -68,8 +68,6 @@ model: model_conf: asr_weight: 0.0 ctc_weight: 0.0 - ctc_dropoutrate: 0.0 - ctc_grad_norm_type: null lsm_weight: 0.1 # label smoothing option length_normalized_loss: false diff --git a/examples/ted_en_zh/st1/conf/transformer_mtl_noam.yaml b/examples/ted_en_zh/st1/conf/transformer_mtl_noam.yaml index 3175aad9..10eccd1e 100644 --- a/examples/ted_en_zh/st1/conf/transformer_mtl_noam.yaml +++ b/examples/ted_en_zh/st1/conf/transformer_mtl_noam.yaml @@ -68,8 +68,6 @@ model: model_conf: asr_weight: 0.5 ctc_weight: 0.3 - ctc_dropoutrate: 0.0 - ctc_grad_norm_type: null lsm_weight: 0.1 # label smoothing option length_normalized_loss: false diff --git a/examples/timit/asr1/conf/transformer.yaml b/examples/timit/asr1/conf/transformer.yaml index af05a6ce..f518cc5e 100644 --- a/examples/timit/asr1/conf/transformer.yaml +++ b/examples/timit/asr1/conf/transformer.yaml @@ -66,8 +66,6 @@ model: # hybrid CTC/attention model_conf: ctc_weight: 0.5 - ctc_dropoutrate: 0.0 - ctc_grad_norm_type: null lsm_weight: 0.1 # label smoothing option length_normalized_loss: false diff --git a/examples/tiny/asr0/conf/deepspeech2.yaml b/examples/tiny/asr0/conf/deepspeech2.yaml index ba453aad..7d841d47 100644 --- a/examples/tiny/asr0/conf/deepspeech2.yaml +++ b/examples/tiny/asr0/conf/deepspeech2.yaml @@ -42,7 +42,7 @@ model: use_gru: False share_rnn_weights: True blank_id: 0 - ctc_grad_norm_type: null + training: n_epoch: 5 diff --git a/examples/tiny/asr0/conf/deepspeech2_online.yaml b/examples/tiny/asr0/conf/deepspeech2_online.yaml index 36c774e3..393b6439 100644 --- a/examples/tiny/asr0/conf/deepspeech2_online.yaml +++ b/examples/tiny/asr0/conf/deepspeech2_online.yaml @@ -44,7 +44,7 @@ model: fc_layers_size_list: 512, 256 use_gru: True blank_id: 0 - ctc_grad_norm_type: null + training: n_epoch: 5 diff --git a/examples/tiny/asr1/conf/chunk_confermer.yaml b/examples/tiny/asr1/conf/chunk_confermer.yaml index 76b97adf..ad27478d 100644 --- a/examples/tiny/asr1/conf/chunk_confermer.yaml +++ b/examples/tiny/asr1/conf/chunk_confermer.yaml @@ -76,8 +76,6 @@ model: # hybrid CTC/attention model_conf: ctc_weight: 0.3 - ctc_dropoutrate: 0.0 - ctc_grad_norm_type: null lsm_weight: 0.1 # label smoothing option length_normalized_loss: false diff --git a/examples/tiny/asr1/conf/chunk_transformer.yaml b/examples/tiny/asr1/conf/chunk_transformer.yaml index 5f1991f9..298518fb 100644 --- a/examples/tiny/asr1/conf/chunk_transformer.yaml +++ b/examples/tiny/asr1/conf/chunk_transformer.yaml @@ -69,8 +69,6 @@ model: # hybrid CTC/attention model_conf: ctc_weight: 0.3 - ctc_dropoutrate: 0.0 - ctc_grad_norm_type: null lsm_weight: 0.1 # label smoothing option length_normalized_loss: false diff --git a/examples/tiny/asr1/conf/conformer.yaml b/examples/tiny/asr1/conf/conformer.yaml index b2937c1b..eb850902 100644 --- a/examples/tiny/asr1/conf/conformer.yaml +++ b/examples/tiny/asr1/conf/conformer.yaml @@ -72,8 +72,6 @@ model: # hybrid CTC/attention model_conf: ctc_weight: 0.3 - ctc_dropoutrate: 0.0 - ctc_grad_norm_type: null lsm_weight: 0.1 # label smoothing option length_normalized_loss: false diff --git a/examples/tiny/asr1/conf/transformer.yaml b/examples/tiny/asr1/conf/transformer.yaml index f5319756..c641d1f5 100644 --- a/examples/tiny/asr1/conf/transformer.yaml +++ b/examples/tiny/asr1/conf/transformer.yaml @@ -66,8 +66,6 @@ model: # hybrid CTC/attention model_conf: ctc_weight: 0.3 - ctc_dropoutrate: 0.0 - ctc_grad_norm_type: null lsm_weight: 0.1 # label smoothing option length_normalized_loss: false diff --git a/examples/wenetspeech/asr1/conf/conformer.yaml b/examples/wenetspeech/asr1/conf/conformer.yaml index fc040a79..a438236d 100644 --- a/examples/wenetspeech/asr1/conf/conformer.yaml +++ b/examples/wenetspeech/asr1/conf/conformer.yaml @@ -33,8 +33,6 @@ model: # hybrid CTC/attention model_conf: ctc_weight: 0.3 - ctc_dropoutrate: 0.0 - ctc_grad_norm_type: null lsm_weight: 0.1 # label smoothing option length_normalized_loss: false diff --git a/paddlespeech/s2t/models/ds2/deepspeech2.py b/paddlespeech/s2t/models/ds2/deepspeech2.py index 317abc69..f0a553ec 100644 --- a/paddlespeech/s2t/models/ds2/deepspeech2.py +++ b/paddlespeech/s2t/models/ds2/deepspeech2.py @@ -129,7 +129,7 @@ class DeepSpeech2Model(nn.Layer): rnn_layer_size=1024, #RNN layer size (number of RNN cells). use_gru=True, #Use gru if set True. Use simple rnn if set False. share_rnn_weights=True, #Whether to share input-hidden weights between forward and backward directional RNNs.Notice that for GRU, weight sharing is not supported. - ctc_grad_norm_type='instance', )) + ctc_grad_norm_type=None,)) if config is not None: config.merge_from_other_cfg(default) return default @@ -143,7 +143,7 @@ class DeepSpeech2Model(nn.Layer): use_gru=False, share_rnn_weights=True, blank_id=0, - ctc_grad_norm_type='instance'): + ctc_grad_norm_type=None): super().__init__() self.encoder = CRNNEncoder( feat_size=feat_size, @@ -220,16 +220,14 @@ class DeepSpeech2Model(nn.Layer): """ model = cls( feat_size=dataloader.collate_fn.feature_size, - #feat_size=dataloader.dataset.feature_size, dict_size=dataloader.collate_fn.vocab_size, - #dict_size=dataloader.dataset.vocab_size, num_conv_layers=config.model.num_conv_layers, num_rnn_layers=config.model.num_rnn_layers, rnn_size=config.model.rnn_layer_size, use_gru=config.model.use_gru, share_rnn_weights=config.model.share_rnn_weights, blank_id=config.model.blank_id, - ctc_grad_norm_type=config.model.ctc_grad_norm_type, ) + ctc_grad_norm_type=config.get('ctc_grad_norm_type', None), ) infos = Checkpoint().load_parameters( model, checkpoint_path=checkpoint_path) logger.info(f"checkpoint info: {infos}") @@ -257,7 +255,7 @@ class DeepSpeech2Model(nn.Layer): use_gru=config.use_gru, share_rnn_weights=config.share_rnn_weights, blank_id=config.blank_id, - ctc_grad_norm_type=config.ctc_grad_norm_type, ) + ctc_grad_norm_type=config.get('ctc_grad_norm_type', None), ) return model diff --git a/paddlespeech/s2t/models/ds2_online/deepspeech2.py b/paddlespeech/s2t/models/ds2_online/deepspeech2.py index d134239f..85876bce 100644 --- a/paddlespeech/s2t/models/ds2_online/deepspeech2.py +++ b/paddlespeech/s2t/models/ds2_online/deepspeech2.py @@ -255,7 +255,7 @@ class DeepSpeech2ModelOnline(nn.Layer): fc_layers_size_list=[512, 256], use_gru=True, #Use gru if set True. Use simple rnn if set False. blank_id=0, # index of blank in vocob.txt - ctc_grad_norm_type='instance', )) + ctc_grad_norm_type=None, )) if config is not None: config.merge_from_other_cfg(default) return default @@ -272,7 +272,7 @@ class DeepSpeech2ModelOnline(nn.Layer): fc_layers_size_list=[512, 256], use_gru=False, blank_id=0, - ctc_grad_norm_type='instance', ): + ctc_grad_norm_type=None, ): super().__init__() self.encoder = CRNNEncoder( feat_size=feat_size, @@ -361,7 +361,7 @@ class DeepSpeech2ModelOnline(nn.Layer): fc_layers_size_list=config.model.fc_layers_size_list, use_gru=config.model.use_gru, blank_id=config.model.blank_id, - ctc_grad_norm_type=config.model.ctc_grad_norm_type, ) + ctc_grad_norm_type=config.get('ctc_grad_norm_type', None), ) infos = Checkpoint().load_parameters( model, checkpoint_path=checkpoint_path) logger.info(f"checkpoint info: {infos}") @@ -391,7 +391,7 @@ class DeepSpeech2ModelOnline(nn.Layer): fc_layers_size_list=config.fc_layers_size_list, use_gru=config.use_gru, blank_id=config.blank_id, - ctc_grad_norm_type=config.ctc_grad_norm_type, ) + ctc_grad_norm_type=config.get('ctc_grad_norm_type', None), ) return model diff --git a/paddlespeech/s2t/models/u2/u2.py b/paddlespeech/s2t/models/u2/u2.py index 4f833372..8053ed3a 100644 --- a/paddlespeech/s2t/models/u2/u2.py +++ b/paddlespeech/s2t/models/u2/u2.py @@ -894,14 +894,16 @@ class U2Model(U2DecodeModel): # ctc decoder and ctc loss model_conf = configs['model_conf'] + dropout_rate = model_conf.get('ctc_dropout_rate', 0.0) + grad_norm_type = model_conf.get('ctc_grad_norm_type', None) ctc = CTCDecoder( odim=vocab_size, enc_n_units=encoder.output_size(), blank_id=0, - dropout_rate=model_conf['ctc_dropoutrate'], + dropout_rate=dropout_rate, reduction=True, # sum batch_average=True, # sum / batch_size - grad_norm_type=model_conf['ctc_grad_norm_type']) + grad_norm_type=grad_norm_type) return vocab_size, encoder, decoder, ctc diff --git a/paddlespeech/s2t/models/u2_st/u2_st.py b/paddlespeech/s2t/models/u2_st/u2_st.py index a83e6707..3a23804f 100644 --- a/paddlespeech/s2t/models/u2_st/u2_st.py +++ b/paddlespeech/s2t/models/u2_st/u2_st.py @@ -655,14 +655,16 @@ class U2STModel(U2STBaseModel): **configs['decoder_conf']) # ctc decoder and ctc loss model_conf = configs['model_conf'] + dropout_rate = model_conf.get('ctc_dropout_rate', 0.0) + grad_norm_type = model_conf.get('ctc_grad_norm_type', None) ctc = CTCDecoder( odim=vocab_size, enc_n_units=encoder.output_size(), blank_id=0, - dropout_rate=model_conf['ctc_dropoutrate'], + dropout_rate=dropout_rate, reduction=True, # sum batch_average=True, # sum / batch_size - grad_norm_type=model_conf['ctc_grad_norm_type']) + grad_norm_type=grad_norm_type) return vocab_size, encoder, (st_decoder, decoder, ctc) else: diff --git a/tests/unit/asr/u2_model_test.py b/tests/unit/asr/u2_model_test.py index f46c6d40..5b11d2ad 100644 --- a/tests/unit/asr/u2_model_test.py +++ b/tests/unit/asr/u2_model_test.py @@ -74,8 +74,6 @@ class TestU2Model(unittest.TestCase): model_conf: ctc_weight: 0.3 lsm_weight: 0.1 # label smoothing option - ctc_dropoutrate: 0.0 - ctc_grad_norm_type: null length_normalized_loss: false """ cfg = CN().load_cfg(conf_str) @@ -128,8 +126,6 @@ class TestU2Model(unittest.TestCase): model_conf: ctc_weight: 0.3 lsm_weight: 0.1 # label smoothing option - ctc_dropoutrate: 0.0 - ctc_grad_norm_type: null length_normalized_loss: false """ cfg = CN().load_cfg(conf_str)