diff --git a/examples/aishell/asr0/conf/deepspeech2_online.yaml b/examples/aishell/asr0/conf/deepspeech2_online.yaml index 010d8f155..2f63f4de0 100644 --- a/examples/aishell/asr0/conf/deepspeech2_online.yaml +++ b/examples/aishell/asr0/conf/deepspeech2_online.yaml @@ -43,7 +43,7 @@ model: fc_layers_size_list: -1, use_gru: False blank_id: 0 - ctc_grad_norm_type: null + training: n_epoch: 65 diff --git a/examples/librispeech/asr0/conf/deepspeech2.yaml b/examples/librispeech/asr0/conf/deepspeech2.yaml index 70fa3fcb2..f3574e150 100644 --- a/examples/librispeech/asr0/conf/deepspeech2.yaml +++ b/examples/librispeech/asr0/conf/deepspeech2.yaml @@ -41,7 +41,7 @@ model: use_gru: False share_rnn_weights: True blank_id: 0 - ctc_grad_norm_type: null + training: n_epoch: 50 diff --git a/examples/librispeech/asr0/conf/deepspeech2_online.yaml b/examples/librispeech/asr0/conf/deepspeech2_online.yaml index 3e07862d6..0d16bc571 100644 --- a/examples/librispeech/asr0/conf/deepspeech2_online.yaml +++ b/examples/librispeech/asr0/conf/deepspeech2_online.yaml @@ -43,7 +43,7 @@ model: fc_layers_size_list: 512, 256 use_gru: False blank_id: 0 - ctc_grad_norm_type: null + training: n_epoch: 50 diff --git a/examples/librispeech/asr1/conf/chunk_conformer.yaml b/examples/librispeech/asr1/conf/chunk_conformer.yaml index 545806640..7f5930378 100644 --- a/examples/librispeech/asr1/conf/chunk_conformer.yaml +++ b/examples/librispeech/asr1/conf/chunk_conformer.yaml @@ -76,8 +76,6 @@ model: # hybrid CTC/attention model_conf: ctc_weight: 0.3 - ctc_dropoutrate: 0.0 - ctc_grad_norm_type: null lsm_weight: 0.1 # label smoothing option length_normalized_loss: false diff --git a/examples/librispeech/asr1/conf/chunk_transformer.yaml b/examples/librispeech/asr1/conf/chunk_transformer.yaml index 70a9dc6af..366d6de0f 100644 --- a/examples/librispeech/asr1/conf/chunk_transformer.yaml +++ b/examples/librispeech/asr1/conf/chunk_transformer.yaml @@ -69,8 +69,6 @@ model: # hybrid CTC/attention model_conf: ctc_weight: 0.3 - ctc_dropoutrate: 0.0 - ctc_grad_norm_type: null lsm_weight: 0.1 # label smoothing option length_normalized_loss: false diff --git a/examples/librispeech/asr1/conf/conformer.yaml b/examples/librispeech/asr1/conf/conformer.yaml index ca934eb1d..f02f24dc6 100644 --- a/examples/librispeech/asr1/conf/conformer.yaml +++ b/examples/librispeech/asr1/conf/conformer.yaml @@ -72,8 +72,6 @@ model: # hybrid CTC/attention model_conf: ctc_weight: 0.3 - ctc_dropoutrate: 0.0 - ctc_grad_norm_type: null lsm_weight: 0.1 # label smoothing option length_normalized_loss: false diff --git a/examples/librispeech/asr1/conf/transformer.yaml b/examples/librispeech/asr1/conf/transformer.yaml index 0cc0dae63..a90efe482 100644 --- a/examples/librispeech/asr1/conf/transformer.yaml +++ b/examples/librispeech/asr1/conf/transformer.yaml @@ -29,8 +29,6 @@ model: # hybrid CTC/attention model_conf: ctc_weight: 0.3 - ctc_dropoutrate: 0.0 - ctc_grad_norm_type: null lsm_weight: 0.1 # label smoothing option length_normalized_loss: false @@ -81,7 +79,7 @@ training: optim_conf: lr: 0.004 weight_decay: 1e-06 - scheduler: warmuplr # pytorch v1.1.0+ required + scheduler: warmuplr scheduler_conf: warmup_steps: 25000 lr_decay: 1.0 diff --git a/examples/librispeech/asr2/conf/transformer.yaml b/examples/librispeech/asr2/conf/transformer.yaml index 00240743e..a16563a59 100644 --- a/examples/librispeech/asr2/conf/transformer.yaml +++ b/examples/librispeech/asr2/conf/transformer.yaml @@ -30,8 +30,6 @@ model: # hybrid CTC/attention model_conf: ctc_weight: 0.3 - ctc_dropoutrate: 0.0 - ctc_grad_norm_type: null lsm_weight: 0.1 # label smoothing option length_normalized_loss: false diff --git a/examples/ted_en_zh/st0/conf/transformer.yaml b/examples/ted_en_zh/st0/conf/transformer.yaml index 6ed75be4e..36f287b10 100644 --- a/examples/ted_en_zh/st0/conf/transformer.yaml +++ b/examples/ted_en_zh/st0/conf/transformer.yaml @@ -68,8 +68,6 @@ model: model_conf: asr_weight: 0.0 ctc_weight: 0.0 - ctc_dropoutrate: 0.0 - ctc_grad_norm_type: null lsm_weight: 0.1 # label smoothing option length_normalized_loss: false diff --git a/examples/ted_en_zh/st0/conf/transformer_mtl_noam.yaml b/examples/ted_en_zh/st0/conf/transformer_mtl_noam.yaml index 7e886cca3..78887d3cd 100644 --- a/examples/ted_en_zh/st0/conf/transformer_mtl_noam.yaml +++ b/examples/ted_en_zh/st0/conf/transformer_mtl_noam.yaml @@ -68,8 +68,6 @@ model: model_conf: asr_weight: 0.5 ctc_weight: 0.3 - ctc_dropoutrate: 0.0 - ctc_grad_norm_type: null lsm_weight: 0.1 # label smoothing option length_normalized_loss: false diff --git a/examples/ted_en_zh/st1/conf/transformer.yaml b/examples/ted_en_zh/st1/conf/transformer.yaml index 3bef7bc5f..609c58240 100644 --- a/examples/ted_en_zh/st1/conf/transformer.yaml +++ b/examples/ted_en_zh/st1/conf/transformer.yaml @@ -68,8 +68,6 @@ model: model_conf: asr_weight: 0.0 ctc_weight: 0.0 - ctc_dropoutrate: 0.0 - ctc_grad_norm_type: null lsm_weight: 0.1 # label smoothing option length_normalized_loss: false diff --git a/examples/ted_en_zh/st1/conf/transformer_mtl_noam.yaml b/examples/ted_en_zh/st1/conf/transformer_mtl_noam.yaml index 3175aad9f..10eccd1eb 100644 --- a/examples/ted_en_zh/st1/conf/transformer_mtl_noam.yaml +++ b/examples/ted_en_zh/st1/conf/transformer_mtl_noam.yaml @@ -68,8 +68,6 @@ model: model_conf: asr_weight: 0.5 ctc_weight: 0.3 - ctc_dropoutrate: 0.0 - ctc_grad_norm_type: null lsm_weight: 0.1 # label smoothing option length_normalized_loss: false diff --git a/examples/timit/asr1/conf/transformer.yaml b/examples/timit/asr1/conf/transformer.yaml index af05a6cea..f518cc5e7 100644 --- a/examples/timit/asr1/conf/transformer.yaml +++ b/examples/timit/asr1/conf/transformer.yaml @@ -66,8 +66,6 @@ model: # hybrid CTC/attention model_conf: ctc_weight: 0.5 - ctc_dropoutrate: 0.0 - ctc_grad_norm_type: null lsm_weight: 0.1 # label smoothing option length_normalized_loss: false diff --git a/examples/tiny/asr0/conf/deepspeech2.yaml b/examples/tiny/asr0/conf/deepspeech2.yaml index ba453aad7..7d841d474 100644 --- a/examples/tiny/asr0/conf/deepspeech2.yaml +++ b/examples/tiny/asr0/conf/deepspeech2.yaml @@ -42,7 +42,7 @@ model: use_gru: False share_rnn_weights: True blank_id: 0 - ctc_grad_norm_type: null + training: n_epoch: 5 diff --git a/examples/tiny/asr0/conf/deepspeech2_online.yaml b/examples/tiny/asr0/conf/deepspeech2_online.yaml index 36c774e37..393b6439f 100644 --- a/examples/tiny/asr0/conf/deepspeech2_online.yaml +++ b/examples/tiny/asr0/conf/deepspeech2_online.yaml @@ -44,7 +44,7 @@ model: fc_layers_size_list: 512, 256 use_gru: True blank_id: 0 - ctc_grad_norm_type: null + training: n_epoch: 5 diff --git a/examples/tiny/asr1/conf/chunk_confermer.yaml b/examples/tiny/asr1/conf/chunk_confermer.yaml index 76b97adf8..ad27478de 100644 --- a/examples/tiny/asr1/conf/chunk_confermer.yaml +++ b/examples/tiny/asr1/conf/chunk_confermer.yaml @@ -76,8 +76,6 @@ model: # hybrid CTC/attention model_conf: ctc_weight: 0.3 - ctc_dropoutrate: 0.0 - ctc_grad_norm_type: null lsm_weight: 0.1 # label smoothing option length_normalized_loss: false diff --git a/examples/tiny/asr1/conf/chunk_transformer.yaml b/examples/tiny/asr1/conf/chunk_transformer.yaml index 5f1991f95..298518fb5 100644 --- a/examples/tiny/asr1/conf/chunk_transformer.yaml +++ b/examples/tiny/asr1/conf/chunk_transformer.yaml @@ -69,8 +69,6 @@ model: # hybrid CTC/attention model_conf: ctc_weight: 0.3 - ctc_dropoutrate: 0.0 - ctc_grad_norm_type: null lsm_weight: 0.1 # label smoothing option length_normalized_loss: false diff --git a/examples/tiny/asr1/conf/conformer.yaml b/examples/tiny/asr1/conf/conformer.yaml index b2937c1bd..eb8509024 100644 --- a/examples/tiny/asr1/conf/conformer.yaml +++ b/examples/tiny/asr1/conf/conformer.yaml @@ -72,8 +72,6 @@ model: # hybrid CTC/attention model_conf: ctc_weight: 0.3 - ctc_dropoutrate: 0.0 - ctc_grad_norm_type: null lsm_weight: 0.1 # label smoothing option length_normalized_loss: false diff --git a/examples/tiny/asr1/conf/transformer.yaml b/examples/tiny/asr1/conf/transformer.yaml index f53197561..c641d1f5b 100644 --- a/examples/tiny/asr1/conf/transformer.yaml +++ b/examples/tiny/asr1/conf/transformer.yaml @@ -66,8 +66,6 @@ model: # hybrid CTC/attention model_conf: ctc_weight: 0.3 - ctc_dropoutrate: 0.0 - ctc_grad_norm_type: null lsm_weight: 0.1 # label smoothing option length_normalized_loss: false diff --git a/examples/wenetspeech/asr1/conf/conformer.yaml b/examples/wenetspeech/asr1/conf/conformer.yaml index fc040a795..a438236d8 100644 --- a/examples/wenetspeech/asr1/conf/conformer.yaml +++ b/examples/wenetspeech/asr1/conf/conformer.yaml @@ -33,8 +33,6 @@ model: # hybrid CTC/attention model_conf: ctc_weight: 0.3 - ctc_dropoutrate: 0.0 - ctc_grad_norm_type: null lsm_weight: 0.1 # label smoothing option length_normalized_loss: false diff --git a/paddlespeech/s2t/models/ds2/deepspeech2.py b/paddlespeech/s2t/models/ds2/deepspeech2.py index 317abc69e..f0a553ec8 100644 --- a/paddlespeech/s2t/models/ds2/deepspeech2.py +++ b/paddlespeech/s2t/models/ds2/deepspeech2.py @@ -129,7 +129,7 @@ class DeepSpeech2Model(nn.Layer): rnn_layer_size=1024, #RNN layer size (number of RNN cells). use_gru=True, #Use gru if set True. Use simple rnn if set False. share_rnn_weights=True, #Whether to share input-hidden weights between forward and backward directional RNNs.Notice that for GRU, weight sharing is not supported. - ctc_grad_norm_type='instance', )) + ctc_grad_norm_type=None,)) if config is not None: config.merge_from_other_cfg(default) return default @@ -143,7 +143,7 @@ class DeepSpeech2Model(nn.Layer): use_gru=False, share_rnn_weights=True, blank_id=0, - ctc_grad_norm_type='instance'): + ctc_grad_norm_type=None): super().__init__() self.encoder = CRNNEncoder( feat_size=feat_size, @@ -220,16 +220,14 @@ class DeepSpeech2Model(nn.Layer): """ model = cls( feat_size=dataloader.collate_fn.feature_size, - #feat_size=dataloader.dataset.feature_size, dict_size=dataloader.collate_fn.vocab_size, - #dict_size=dataloader.dataset.vocab_size, num_conv_layers=config.model.num_conv_layers, num_rnn_layers=config.model.num_rnn_layers, rnn_size=config.model.rnn_layer_size, use_gru=config.model.use_gru, share_rnn_weights=config.model.share_rnn_weights, blank_id=config.model.blank_id, - ctc_grad_norm_type=config.model.ctc_grad_norm_type, ) + ctc_grad_norm_type=config.get('ctc_grad_norm_type', None), ) infos = Checkpoint().load_parameters( model, checkpoint_path=checkpoint_path) logger.info(f"checkpoint info: {infos}") @@ -257,7 +255,7 @@ class DeepSpeech2Model(nn.Layer): use_gru=config.use_gru, share_rnn_weights=config.share_rnn_weights, blank_id=config.blank_id, - ctc_grad_norm_type=config.ctc_grad_norm_type, ) + ctc_grad_norm_type=config.get('ctc_grad_norm_type', None), ) return model diff --git a/paddlespeech/s2t/models/ds2_online/deepspeech2.py b/paddlespeech/s2t/models/ds2_online/deepspeech2.py index d134239f2..85876bce8 100644 --- a/paddlespeech/s2t/models/ds2_online/deepspeech2.py +++ b/paddlespeech/s2t/models/ds2_online/deepspeech2.py @@ -255,7 +255,7 @@ class DeepSpeech2ModelOnline(nn.Layer): fc_layers_size_list=[512, 256], use_gru=True, #Use gru if set True. Use simple rnn if set False. blank_id=0, # index of blank in vocob.txt - ctc_grad_norm_type='instance', )) + ctc_grad_norm_type=None, )) if config is not None: config.merge_from_other_cfg(default) return default @@ -272,7 +272,7 @@ class DeepSpeech2ModelOnline(nn.Layer): fc_layers_size_list=[512, 256], use_gru=False, blank_id=0, - ctc_grad_norm_type='instance', ): + ctc_grad_norm_type=None, ): super().__init__() self.encoder = CRNNEncoder( feat_size=feat_size, @@ -361,7 +361,7 @@ class DeepSpeech2ModelOnline(nn.Layer): fc_layers_size_list=config.model.fc_layers_size_list, use_gru=config.model.use_gru, blank_id=config.model.blank_id, - ctc_grad_norm_type=config.model.ctc_grad_norm_type, ) + ctc_grad_norm_type=config.get('ctc_grad_norm_type', None), ) infos = Checkpoint().load_parameters( model, checkpoint_path=checkpoint_path) logger.info(f"checkpoint info: {infos}") @@ -391,7 +391,7 @@ class DeepSpeech2ModelOnline(nn.Layer): fc_layers_size_list=config.fc_layers_size_list, use_gru=config.use_gru, blank_id=config.blank_id, - ctc_grad_norm_type=config.ctc_grad_norm_type, ) + ctc_grad_norm_type=config.get('ctc_grad_norm_type', None), ) return model diff --git a/paddlespeech/s2t/models/u2/u2.py b/paddlespeech/s2t/models/u2/u2.py index 4f833372a..8053ed3a8 100644 --- a/paddlespeech/s2t/models/u2/u2.py +++ b/paddlespeech/s2t/models/u2/u2.py @@ -894,14 +894,16 @@ class U2Model(U2DecodeModel): # ctc decoder and ctc loss model_conf = configs['model_conf'] + dropout_rate = model_conf.get('ctc_dropout_rate', 0.0) + grad_norm_type = model_conf.get('ctc_grad_norm_type', None) ctc = CTCDecoder( odim=vocab_size, enc_n_units=encoder.output_size(), blank_id=0, - dropout_rate=model_conf['ctc_dropoutrate'], + dropout_rate=dropout_rate, reduction=True, # sum batch_average=True, # sum / batch_size - grad_norm_type=model_conf['ctc_grad_norm_type']) + grad_norm_type=grad_norm_type) return vocab_size, encoder, decoder, ctc diff --git a/paddlespeech/s2t/models/u2_st/u2_st.py b/paddlespeech/s2t/models/u2_st/u2_st.py index a83e67078..3a23804fe 100644 --- a/paddlespeech/s2t/models/u2_st/u2_st.py +++ b/paddlespeech/s2t/models/u2_st/u2_st.py @@ -655,14 +655,16 @@ class U2STModel(U2STBaseModel): **configs['decoder_conf']) # ctc decoder and ctc loss model_conf = configs['model_conf'] + dropout_rate = model_conf.get('ctc_dropout_rate', 0.0) + grad_norm_type = model_conf.get('ctc_grad_norm_type', None) ctc = CTCDecoder( odim=vocab_size, enc_n_units=encoder.output_size(), blank_id=0, - dropout_rate=model_conf['ctc_dropoutrate'], + dropout_rate=dropout_rate, reduction=True, # sum batch_average=True, # sum / batch_size - grad_norm_type=model_conf['ctc_grad_norm_type']) + grad_norm_type=grad_norm_type) return vocab_size, encoder, (st_decoder, decoder, ctc) else: diff --git a/tests/unit/asr/u2_model_test.py b/tests/unit/asr/u2_model_test.py index f46c6d403..5b11d2ad3 100644 --- a/tests/unit/asr/u2_model_test.py +++ b/tests/unit/asr/u2_model_test.py @@ -74,8 +74,6 @@ class TestU2Model(unittest.TestCase): model_conf: ctc_weight: 0.3 lsm_weight: 0.1 # label smoothing option - ctc_dropoutrate: 0.0 - ctc_grad_norm_type: null length_normalized_loss: false """ cfg = CN().load_cfg(conf_str) @@ -128,8 +126,6 @@ class TestU2Model(unittest.TestCase): model_conf: ctc_weight: 0.3 lsm_weight: 0.1 # label smoothing option - ctc_dropoutrate: 0.0 - ctc_grad_norm_type: null length_normalized_loss: false """ cfg = CN().load_cfg(conf_str)