diff --git a/deepspeech/__init__.py b/deepspeech/__init__.py index da3b1acb..ed209f3d 100644 --- a/deepspeech/__init__.py +++ b/deepspeech/__init__.py @@ -356,35 +356,7 @@ if not hasattr(paddle.Tensor, 'tolist'): setattr(paddle.Tensor, 'tolist', tolist) -########### hcak paddle.nn.functional ############# -# hack loss -def ctc_loss(logits, - labels, - input_lengths, - label_lengths, - blank=0, - reduction='mean', - norm_by_times=True): - #logger.info("my ctc loss with norm by times") - ## https://github.com/PaddlePaddle/Paddle/blob/f5ca2db2cc/paddle/fluid/operators/warpctc_op.h#L403 - loss_out = paddle.fluid.layers.warpctc(logits, labels, blank, norm_by_times, - input_lengths, label_lengths) - - loss_out = paddle.fluid.layers.squeeze(loss_out, [-1]) - assert reduction in ['mean', 'sum', 'none'] - if reduction == 'mean': - loss_out = paddle.mean(loss_out / label_lengths) - elif reduction == 'sum': - loss_out = paddle.sum(loss_out) - return loss_out - - -logger.debug( - "override ctc_loss of paddle.nn.functional if exists, remove this when fixed!" -) -F.ctc_loss = ctc_loss - -########### hcak paddle.nn ############# +########### hack paddle.nn ############# from paddle.nn import Layer from typing import Optional from typing import Mapping @@ -534,3 +506,5 @@ if not hasattr(paddle.nn, 'LayerDict'): logger.debug( "register user LayerDict to paddle.nn, remove this when fixed!") setattr(paddle.nn, 'LayerDict', LayerDict) + + diff --git a/deepspeech/modules/ctc.py b/deepspeech/modules/ctc.py index 565a11e1..e0c8006d 100644 --- a/deepspeech/modules/ctc.py +++ b/deepspeech/modules/ctc.py @@ -13,6 +13,7 @@ # limitations under the License. import paddle from paddle import nn +from typing import Union from paddle.nn import functional as F from typeguard import check_argument_types @@ -40,7 +41,7 @@ class CTCDecoderBase(nn.Layer): dropout_rate: float=0.0, reduction: bool=True, batch_average: bool=True, - grad_norm_type: str="instance"): + grad_norm_type: Union[str, None]=None): """CTC decoder Args: @@ -49,7 +50,7 @@ class CTCDecoderBase(nn.Layer): dropout_rate (float): dropout rate (0.0 ~ 1.0) reduction (bool): reduce the CTC loss into a scalar, True for 'sum' or 'none' batch_average (bool): do batch dim wise average. - grad_norm_type (str): one of 'instance', 'batch', 'frame', None. + grad_norm_type (str): Default, None. one of 'instance', 'batch', 'frame', None. """ assert check_argument_types() super().__init__() diff --git a/deepspeech/modules/loss.py b/deepspeech/modules/loss.py index e06f26f8..e1138810 100644 --- a/deepspeech/modules/loss.py +++ b/deepspeech/modules/loss.py @@ -54,7 +54,7 @@ class CTCLoss(nn.Layer): self.norm_by_total_logits_len = True else: raise ValueError(f"CTCLoss Grad Norm no support {grad_norm_type}") - self.kwargs = { + kwargs = { "norm_by_times": self.norm_by_times, "norm_by_batchsize": self.norm_by_batchsize, "norm_by_total_logits_len": self.norm_by_total_logits_len, @@ -66,10 +66,9 @@ class CTCLoss(nn.Layer): except ValueError: # Some function, e.g. built-in function, are failed param = {} - self._kwargs = {k: v for k, v in self.kwargs.items() if k in param} - _notin = {k: v for k, v in self.kwargs.items() if k not in param} + self._kwargs = {k: v for k, v in kwargs.items() if k in param} + _notin = {k: v for k, v in kwargs.items() if k not in param} logger.info(f"{self.loss} kwargs:{self._kwargs}, not support: {_notin}") - #self.loss_fn = partial(self.loss.forward, **_kwargs) def forward(self, logits, ys_pad, hlens, ys_lens): """Compute CTC loss. @@ -89,8 +88,7 @@ class CTCLoss(nn.Layer): # logits: (B, L, D) -> (L, B, D) logits = logits.transpose([1, 0, 2]) ys_pad = ys_pad.astype(paddle.int32) - #loss = self.loss_fn(logits, ys_pad, hlens, ys_lens) - loss = self.loss(logits, ys_pad, hlens, ys_lens) + loss = self.loss(logits, ys_pad, hlens, ys_lens, **self._kwargs) if self.batch_average: # Batch-size average loss = loss / B diff --git a/examples/aishell/s0/conf/deepspeech2.yaml b/examples/aishell/s0/conf/deepspeech2.yaml index ffefaeb3..7a198991 100644 --- a/examples/aishell/s0/conf/deepspeech2.yaml +++ b/examples/aishell/s0/conf/deepspeech2.yaml @@ -41,7 +41,7 @@ model: use_gru: True share_rnn_weights: False blank_id: 0 - ctc_grad_norm_type: instance + ctc_grad_norm_type: null training: n_epoch: 80 diff --git a/examples/aishell/s0/conf/deepspeech2_online.yaml b/examples/aishell/s0/conf/deepspeech2_online.yaml index cac599dc..c15e71a3 100644 --- a/examples/aishell/s0/conf/deepspeech2_online.yaml +++ b/examples/aishell/s0/conf/deepspeech2_online.yaml @@ -43,7 +43,7 @@ model: fc_layers_size_list: -1, use_gru: False blank_id: 0 - ctc_grad_norm_type: instance + ctc_grad_norm_type: null training: n_epoch: 50 diff --git a/examples/aishell/s1/conf/chunk_conformer.yaml b/examples/aishell/s1/conf/chunk_conformer.yaml index 9b563da2..8682538b 100644 --- a/examples/aishell/s1/conf/chunk_conformer.yaml +++ b/examples/aishell/s1/conf/chunk_conformer.yaml @@ -77,7 +77,7 @@ model: model_conf: ctc_weight: 0.3 ctc_dropoutrate: 0.0 - ctc_grad_norm_type: instance + ctc_grad_norm_type: null lsm_weight: 0.1 # label smoothing option length_normalized_loss: false diff --git a/examples/aishell/s1/conf/conformer.yaml b/examples/aishell/s1/conf/conformer.yaml index dfa9a4b0..71cd044e 100644 --- a/examples/aishell/s1/conf/conformer.yaml +++ b/examples/aishell/s1/conf/conformer.yaml @@ -72,7 +72,7 @@ model: model_conf: ctc_weight: 0.3 ctc_dropoutrate: 0.0 - ctc_grad_norm_type: instance + ctc_grad_norm_type: null lsm_weight: 0.1 # label smoothing option length_normalized_loss: false diff --git a/examples/librispeech/s0/conf/deepspeech2.yaml b/examples/librispeech/s0/conf/deepspeech2.yaml index 47ef9421..8afaabf4 100644 --- a/examples/librispeech/s0/conf/deepspeech2.yaml +++ b/examples/librispeech/s0/conf/deepspeech2.yaml @@ -41,7 +41,7 @@ model: use_gru: False share_rnn_weights: True blank_id: 0 - ctc_grad_norm_type: instance + ctc_grad_norm_type: null training: n_epoch: 50 diff --git a/examples/librispeech/s0/conf/deepspeech2_online.yaml b/examples/librispeech/s0/conf/deepspeech2_online.yaml index e2f91094..d6ab9523 100644 --- a/examples/librispeech/s0/conf/deepspeech2_online.yaml +++ b/examples/librispeech/s0/conf/deepspeech2_online.yaml @@ -43,7 +43,7 @@ model: fc_layers_size_list: 512, 256 use_gru: False blank_id: 0 - ctc_grad_norm_type: instance + ctc_grad_norm_type: null training: n_epoch: 50 diff --git a/examples/librispeech/s1/conf/chunk_conformer.yaml b/examples/librispeech/s1/conf/chunk_conformer.yaml index 9936450b..4d0e6ceb 100644 --- a/examples/librispeech/s1/conf/chunk_conformer.yaml +++ b/examples/librispeech/s1/conf/chunk_conformer.yaml @@ -77,7 +77,7 @@ model: model_conf: ctc_weight: 0.3 ctc_dropoutrate: 0.0 - ctc_grad_norm_type: instance + ctc_grad_norm_type: null lsm_weight: 0.1 # label smoothing option length_normalized_loss: false diff --git a/examples/librispeech/s1/conf/chunk_transformer.yaml b/examples/librispeech/s1/conf/chunk_transformer.yaml index 44f3e5a7..c7b53f95 100644 --- a/examples/librispeech/s1/conf/chunk_transformer.yaml +++ b/examples/librispeech/s1/conf/chunk_transformer.yaml @@ -70,7 +70,7 @@ model: model_conf: ctc_weight: 0.3 ctc_dropoutrate: 0.0 - ctc_grad_norm_type: instance + ctc_grad_norm_type: null lsm_weight: 0.1 # label smoothing option length_normalized_loss: false diff --git a/examples/librispeech/s1/conf/conformer.yaml b/examples/librispeech/s1/conf/conformer.yaml index a05e37dd..3bc942dc 100644 --- a/examples/librispeech/s1/conf/conformer.yaml +++ b/examples/librispeech/s1/conf/conformer.yaml @@ -73,7 +73,7 @@ model: model_conf: ctc_weight: 0.3 ctc_dropoutrate: 0.0 - ctc_grad_norm_type: instance + ctc_grad_norm_type: null lsm_weight: 0.1 # label smoothing option length_normalized_loss: false diff --git a/examples/librispeech/s1/conf/transformer.yaml b/examples/librispeech/s1/conf/transformer.yaml index c9dc1413..3cc17004 100644 --- a/examples/librispeech/s1/conf/transformer.yaml +++ b/examples/librispeech/s1/conf/transformer.yaml @@ -68,7 +68,7 @@ model: model_conf: ctc_weight: 0.3 ctc_dropoutrate: 0.0 - ctc_grad_norm_type: instance + ctc_grad_norm_type: null lsm_weight: 0.1 # label smoothing option length_normalized_loss: false diff --git a/examples/librispeech/s2/conf/chunk_conformer.yaml b/examples/librispeech/s2/conf/chunk_conformer.yaml index 872b560b..afd2b051 100644 --- a/examples/librispeech/s2/conf/chunk_conformer.yaml +++ b/examples/librispeech/s2/conf/chunk_conformer.yaml @@ -77,7 +77,7 @@ model: model_conf: ctc_weight: 0.3 ctc_dropoutrate: 0.0 - ctc_grad_norm_type: instance + ctc_grad_norm_type: null lsm_weight: 0.1 # label smoothing option length_normalized_loss: false diff --git a/examples/librispeech/s2/conf/chunk_transformer.yaml b/examples/librispeech/s2/conf/chunk_transformer.yaml index 132a4f9d..721bb7d9 100644 --- a/examples/librispeech/s2/conf/chunk_transformer.yaml +++ b/examples/librispeech/s2/conf/chunk_transformer.yaml @@ -70,7 +70,7 @@ model: model_conf: ctc_weight: 0.3 ctc_dropoutrate: 0.0 - ctc_grad_norm_type: instance + ctc_grad_norm_type: null lsm_weight: 0.1 # label smoothing option length_normalized_loss: false diff --git a/examples/librispeech/s2/conf/conformer.yaml b/examples/librispeech/s2/conf/conformer.yaml index bc87466e..ef87753c 100644 --- a/examples/librispeech/s2/conf/conformer.yaml +++ b/examples/librispeech/s2/conf/conformer.yaml @@ -73,7 +73,7 @@ model: model_conf: ctc_weight: 0.3 ctc_dropoutrate: 0.0 - ctc_grad_norm_type: instance + ctc_grad_norm_type: null lsm_weight: 0.1 # label smoothing option length_normalized_loss: false diff --git a/examples/ted_en_zh/t0/conf/transformer.yaml b/examples/ted_en_zh/t0/conf/transformer.yaml index 8c03e328..8a7e10f0 100644 --- a/examples/ted_en_zh/t0/conf/transformer.yaml +++ b/examples/ted_en_zh/t0/conf/transformer.yaml @@ -69,7 +69,7 @@ model: asr_weight: 0.0 ctc_weight: 0.0 ctc_dropoutrate: 0.0 - ctc_grad_norm_type: instance + ctc_grad_norm_type: null lsm_weight: 0.1 # label smoothing option length_normalized_loss: false diff --git a/examples/ted_en_zh/t0/conf/transformer_joint_noam.yaml b/examples/ted_en_zh/t0/conf/transformer_joint_noam.yaml index cbfae93e..f8dc383f 100644 --- a/examples/ted_en_zh/t0/conf/transformer_joint_noam.yaml +++ b/examples/ted_en_zh/t0/conf/transformer_joint_noam.yaml @@ -69,7 +69,7 @@ model: asr_weight: 0.5 ctc_weight: 0.3 ctc_dropoutrate: 0.0 - ctc_grad_norm_type: instance + ctc_grad_norm_type: null lsm_weight: 0.1 # label smoothing option length_normalized_loss: false diff --git a/examples/timit/s1/conf/transformer.yaml b/examples/timit/s1/conf/transformer.yaml index e138fbbe..d3ced898 100644 --- a/examples/timit/s1/conf/transformer.yaml +++ b/examples/timit/s1/conf/transformer.yaml @@ -67,7 +67,7 @@ model: model_conf: ctc_weight: 0.5 ctc_dropoutrate: 0.0 - ctc_grad_norm_type: instance + ctc_grad_norm_type: null lsm_weight: 0.1 # label smoothing option length_normalized_loss: false diff --git a/examples/tiny/s0/conf/deepspeech2.yaml b/examples/tiny/s0/conf/deepspeech2.yaml index a7940cb2..621b372c 100644 --- a/examples/tiny/s0/conf/deepspeech2.yaml +++ b/examples/tiny/s0/conf/deepspeech2.yaml @@ -42,7 +42,7 @@ model: use_gru: False share_rnn_weights: True blank_id: 0 - ctc_grad_norm_type: instance + ctc_grad_norm_type: null training: n_epoch: 10 diff --git a/examples/tiny/s0/conf/deepspeech2_online.yaml b/examples/tiny/s0/conf/deepspeech2_online.yaml index 7e30409f..5a8294ad 100644 --- a/examples/tiny/s0/conf/deepspeech2_online.yaml +++ b/examples/tiny/s0/conf/deepspeech2_online.yaml @@ -44,7 +44,7 @@ model: fc_layers_size_list: 512, 256 use_gru: True blank_id: 0 - ctc_grad_norm_type: instance + ctc_grad_norm_type: null training: n_epoch: 10 diff --git a/examples/tiny/s1/conf/chunk_confermer.yaml b/examples/tiny/s1/conf/chunk_confermer.yaml index f3c7e1dd..b14b4b21 100644 --- a/examples/tiny/s1/conf/chunk_confermer.yaml +++ b/examples/tiny/s1/conf/chunk_confermer.yaml @@ -77,7 +77,7 @@ model: model_conf: ctc_weight: 0.3 ctc_dropoutrate: 0.0 - ctc_grad_norm_type: instance + ctc_grad_norm_type: null lsm_weight: 0.1 # label smoothing option length_normalized_loss: false diff --git a/examples/tiny/s1/conf/chunk_transformer.yaml b/examples/tiny/s1/conf/chunk_transformer.yaml index 83005754..38edbf35 100644 --- a/examples/tiny/s1/conf/chunk_transformer.yaml +++ b/examples/tiny/s1/conf/chunk_transformer.yaml @@ -70,7 +70,7 @@ model: model_conf: ctc_weight: 0.3 ctc_dropoutrate: 0.0 - ctc_grad_norm_type: instance + ctc_grad_norm_type: null lsm_weight: 0.1 # label smoothing option length_normalized_loss: false diff --git a/examples/tiny/s1/conf/conformer.yaml b/examples/tiny/s1/conf/conformer.yaml index 628e3b77..0b06b2b7 100644 --- a/examples/tiny/s1/conf/conformer.yaml +++ b/examples/tiny/s1/conf/conformer.yaml @@ -73,7 +73,7 @@ model: model_conf: ctc_weight: 0.3 ctc_dropoutrate: 0.0 - ctc_grad_norm_type: instance + ctc_grad_norm_type: null lsm_weight: 0.1 # label smoothing option length_normalized_loss: false diff --git a/examples/tiny/s1/conf/transformer.yaml b/examples/tiny/s1/conf/transformer.yaml index 27ffcae4..1c6f9e02 100644 --- a/examples/tiny/s1/conf/transformer.yaml +++ b/examples/tiny/s1/conf/transformer.yaml @@ -67,7 +67,7 @@ model: model_conf: ctc_weight: 0.3 ctc_dropoutrate: 0.0 - ctc_grad_norm_type: instance + ctc_grad_norm_type: null lsm_weight: 0.1 # label smoothing option length_normalized_loss: false