diff --git a/deepspeech/models/u2.py b/deepspeech/models/u2.py
index a01766da..7061d173 100644
--- a/deepspeech/models/u2.py
+++ b/deepspeech/models/u2.py
@@ -661,9 +661,7 @@ class U2BaseModel(nn.Layer):
             xs, offset, required_cache_size, subsampling_cache,
             elayers_output_cache, conformer_cnn_cache)
 
-    # @jit.to_static([
-    #         paddle.static.InputSpec(shape=[1, None, feat_dim],dtype='float32'),  # audio feat, [B,T,D]
-    #     ])
+    # @jit.to_static
     def ctc_activation(self, xs: paddle.Tensor) -> paddle.Tensor:
         """ Export interface for c++ call, apply linear transform and log
             softmax before ctc
@@ -830,6 +828,7 @@ class U2Model(U2BaseModel):
         Returns:
             int, nn.Layer, nn.Layer, nn.Layer: vocab size, encoder, decoder, ctc
         """
+        # cmvn
         if configs['cmvn_file'] is not None:
             mean, istd = load_cmvn(configs['cmvn_file'],
                                    configs['cmvn_file_type'])
@@ -839,11 +838,13 @@ class U2Model(U2BaseModel):
         else:
             global_cmvn = None
 
+        # input & output dim
         input_dim = configs['input_dim']
         vocab_size = configs['output_dim']
         assert input_dim != 0, input_dim
         assert vocab_size != 0, vocab_size
 
+        # encoder
         encoder_type = configs.get('encoder', 'transformer')
         logger.info(f"U2 Encoder type: {encoder_type}")
         if encoder_type == 'transformer':
@@ -855,17 +856,21 @@ class U2Model(U2BaseModel):
         else:
             raise ValueError(f"not support encoder type:{encoder_type}")
 
+        # decoder
         decoder = TransformerDecoder(vocab_size,
                                      encoder.output_size(),
                                      **configs['decoder_conf'])
+
+        # ctc decoder and ctc loss
+        model_conf = configs['model_conf']
         ctc = CTCDecoder(
             odim=vocab_size,
             enc_n_units=encoder.output_size(),
             blank_id=0,
-            dropout_rate=0.0,
+            dropout_rate=model_conf['ctc_dropout_rate'],
             reduction=True,  # sum
             batch_average=True,  # sum / batch_size
-            grad_norm_type='instance')
+            grad_norm_type=model_conf['ctc_grad_norm_type'])
 
         return vocab_size, encoder, decoder, ctc
 
diff --git a/deepspeech/models/u2_st.py b/deepspeech/models/u2_st.py
index 7dae3745..6737a549 100644
--- a/deepspeech/models/u2_st.py
+++ b/deepspeech/models/u2_st.py
@@ -413,26 +413,26 @@ class U2STBaseModel(nn.Layer):
         best_hyps = best_hyps[:, 1:]
         return best_hyps
 
-    @jit.to_static
+    # @jit.to_static
     def subsampling_rate(self) -> int:
         """ Export interface for c++ call, return subsampling_rate of the
             model
         """
         return self.encoder.embed.subsampling_rate
 
-    @jit.to_static
+    # @jit.to_static
     def right_context(self) -> int:
         """ Export interface for c++ call, return right_context of the model
         """
         return self.encoder.embed.right_context
 
-    @jit.to_static
+    # @jit.to_static
     def sos_symbol(self) -> int:
         """ Export interface for c++ call, return sos symbol id of the model
         """
         return self.sos
 
-    @jit.to_static
+    # @jit.to_static
     def eos_symbol(self) -> int:
         """ Export interface for c++ call, return eos symbol id of the model
         """
@@ -468,7 +468,7 @@ class U2STBaseModel(nn.Layer):
             xs, offset, required_cache_size, subsampling_cache,
             elayers_output_cache, conformer_cnn_cache)
 
-    @jit.to_static
+    # @jit.to_static
     def ctc_activation(self, xs: paddle.Tensor) -> paddle.Tensor:
         """ Export interface for c++ call, apply linear transform and log
             softmax before ctc
@@ -643,14 +643,16 @@ class U2STModel(U2STBaseModel):
             decoder = TransformerDecoder(vocab_size,
                                          encoder.output_size(),
                                          **configs['decoder_conf'])
+            # ctc decoder and ctc loss
+            model_conf = configs['model_conf']
             ctc = CTCDecoder(
                 odim=vocab_size,
                 enc_n_units=encoder.output_size(),
                 blank_id=0,
-                dropout_rate=0.0,
+                dropout_rate=model_conf['ctc_dropout_rate'],
                 reduction=True,  # sum
                 batch_average=True,  # sum / batch_size
-                grad_norm_type='instance')
+                grad_norm_type=model_conf['ctc_grad_norm_type'])
 
             return vocab_size, encoder, (st_decoder, decoder, ctc)
         else:
diff --git a/examples/aishell/s1/conf/chunk_conformer.yaml b/examples/aishell/s1/conf/chunk_conformer.yaml
index 3e606788..6f8ae135 100644
--- a/examples/aishell/s1/conf/chunk_conformer.yaml
+++ b/examples/aishell/s1/conf/chunk_conformer.yaml
@@ -76,6 +76,8 @@ model:
     # hybrid CTC/attention
     model_conf:
         ctc_weight: 0.3
+        ctc_dropoutrate: 0.0
+        ctc_grad_norm_type: instance
         lsm_weight: 0.1     # label smoothing option
         length_normalized_loss: false
 
diff --git a/examples/aishell/s1/conf/conformer.yaml b/examples/aishell/s1/conf/conformer.yaml
index 4b1430c5..a4248459 100644
--- a/examples/aishell/s1/conf/conformer.yaml
+++ b/examples/aishell/s1/conf/conformer.yaml
@@ -71,6 +71,8 @@ model:
     # hybrid CTC/attention
     model_conf:
         ctc_weight: 0.3
+        ctc_dropoutrate: 0.0
+        ctc_grad_norm_type: instance
         lsm_weight: 0.1     # label smoothing option
         length_normalized_loss: false
 
diff --git a/examples/librispeech/s1/conf/chunk_conformer.yaml b/examples/librispeech/s1/conf/chunk_conformer.yaml
index 0de1aefe..92db20f6 100644
--- a/examples/librispeech/s1/conf/chunk_conformer.yaml
+++ b/examples/librispeech/s1/conf/chunk_conformer.yaml
@@ -76,6 +76,8 @@ model:
     # hybrid CTC/attention
     model_conf:
         ctc_weight: 0.3
+        ctc_dropoutrate: 0.0
+        ctc_grad_norm_type: instance
         lsm_weight: 0.1     # label smoothing option
         length_normalized_loss: false
 
diff --git a/examples/librispeech/s1/conf/chunk_transformer.yaml b/examples/librispeech/s1/conf/chunk_transformer.yaml
index f782a037..e0bc3135 100644
--- a/examples/librispeech/s1/conf/chunk_transformer.yaml
+++ b/examples/librispeech/s1/conf/chunk_transformer.yaml
@@ -69,6 +69,8 @@ model:
     # hybrid CTC/attention
     model_conf:
         ctc_weight: 0.3
+        ctc_dropoutrate: 0.0
+        ctc_grad_norm_type: instance
         lsm_weight: 0.1     # label smoothing option
         length_normalized_loss: false
 
diff --git a/examples/librispeech/s1/conf/conformer.yaml b/examples/librispeech/s1/conf/conformer.yaml
index 6d825f05..78be249c 100644
--- a/examples/librispeech/s1/conf/conformer.yaml
+++ b/examples/librispeech/s1/conf/conformer.yaml
@@ -72,6 +72,8 @@ model:
     # hybrid CTC/attention
     model_conf:
         ctc_weight: 0.3
+        ctc_dropoutrate: 0.0
+        ctc_grad_norm_type: instance
         lsm_weight: 0.1     # label smoothing option
         length_normalized_loss: false
 
diff --git a/examples/librispeech/s2/conf/chunk_conformer.yaml b/examples/librispeech/s2/conf/chunk_conformer.yaml
index 0de1aefe..92db20f6 100644
--- a/examples/librispeech/s2/conf/chunk_conformer.yaml
+++ b/examples/librispeech/s2/conf/chunk_conformer.yaml
@@ -76,6 +76,8 @@ model:
     # hybrid CTC/attention
     model_conf:
         ctc_weight: 0.3
+        ctc_dropoutrate: 0.0
+        ctc_grad_norm_type: instance
         lsm_weight: 0.1     # label smoothing option
         length_normalized_loss: false
 
diff --git a/examples/librispeech/s2/conf/chunk_transformer.yaml b/examples/librispeech/s2/conf/chunk_transformer.yaml
index f782a037..e0bc3135 100644
--- a/examples/librispeech/s2/conf/chunk_transformer.yaml
+++ b/examples/librispeech/s2/conf/chunk_transformer.yaml
@@ -69,6 +69,8 @@ model:
     # hybrid CTC/attention
     model_conf:
         ctc_weight: 0.3
+        ctc_dropoutrate: 0.0
+        ctc_grad_norm_type: instance
         lsm_weight: 0.1     # label smoothing option
         length_normalized_loss: false
 
diff --git a/examples/librispeech/s2/conf/conformer.yaml b/examples/librispeech/s2/conf/conformer.yaml
index 955b6108..9a727413 100644
--- a/examples/librispeech/s2/conf/conformer.yaml
+++ b/examples/librispeech/s2/conf/conformer.yaml
@@ -72,6 +72,8 @@ model:
     # hybrid CTC/attention
     model_conf:
         ctc_weight: 0.3
+        ctc_dropoutrate: 0.0
+        ctc_grad_norm_type: instance
         lsm_weight: 0.1     # label smoothing option
         length_normalized_loss: false
 
diff --git a/examples/librispeech/s2/conf/transformer.yaml b/examples/librispeech/s2/conf/transformer.yaml
index 4c60913e..edf5b81d 100644
--- a/examples/librispeech/s2/conf/transformer.yaml
+++ b/examples/librispeech/s2/conf/transformer.yaml
@@ -58,6 +58,8 @@ model:
     # hybrid CTC/attention
     model_conf:
         ctc_weight: 0.3
+        ctc_dropoutrate: 0.0
+        ctc_grad_norm_type: instance
         lsm_weight: 0.1     # label smoothing option
         length_normalized_loss: false
 
diff --git a/examples/ted_en_zh/t0/conf/transformer.yaml b/examples/ted_en_zh/t0/conf/transformer.yaml
index 755e0446..1aad86d2 100644
--- a/examples/ted_en_zh/t0/conf/transformer.yaml
+++ b/examples/ted_en_zh/t0/conf/transformer.yaml
@@ -68,6 +68,8 @@ model:
     model_conf:
         asr_weight: 0.0
         ctc_weight: 0.0
+        ctc_dropoutrate: 0.0
+        ctc_grad_norm_type: instance
         lsm_weight: 0.1     # label smoothing option
         length_normalized_loss: false
 
diff --git a/examples/ted_en_zh/t0/conf/transformer_joint_noam.yaml b/examples/ted_en_zh/t0/conf/transformer_joint_noam.yaml
index bc1f8890..0144c40d 100644
--- a/examples/ted_en_zh/t0/conf/transformer_joint_noam.yaml
+++ b/examples/ted_en_zh/t0/conf/transformer_joint_noam.yaml
@@ -68,6 +68,8 @@ model:
     model_conf:
         asr_weight: 0.5
         ctc_weight: 0.3
+        ctc_dropoutrate: 0.0
+        ctc_grad_norm_type: instance
         lsm_weight: 0.1     # label smoothing option
         length_normalized_loss: false
 
diff --git a/examples/timit/s1/conf/transformer.yaml b/examples/timit/s1/conf/transformer.yaml
index eb191d0b..c3b51996 100644
--- a/examples/timit/s1/conf/transformer.yaml
+++ b/examples/timit/s1/conf/transformer.yaml
@@ -66,6 +66,8 @@ model:
     # hybrid CTC/attention
     model_conf:
         ctc_weight: 0.3
+        ctc_dropoutrate: 0.0
+        ctc_grad_norm_type: instance
         lsm_weight: 0.1     # label smoothing option
         length_normalized_loss: false
 
diff --git a/examples/tiny/s1/conf/chunk_confermer.yaml b/examples/tiny/s1/conf/chunk_confermer.yaml
index 96da3d9f..be2e82f9 100644
--- a/examples/tiny/s1/conf/chunk_confermer.yaml
+++ b/examples/tiny/s1/conf/chunk_confermer.yaml
@@ -76,6 +76,8 @@ model:
     # hybrid CTC/attention
     model_conf:
         ctc_weight: 0.3
+        ctc_dropoutrate: 0.0
+        ctc_grad_norm_type: instance
         lsm_weight: 0.1     # label smoothing option
         length_normalized_loss: false
 
diff --git a/examples/tiny/s1/conf/chunk_transformer.yaml b/examples/tiny/s1/conf/chunk_transformer.yaml
index 1adb91c4..93439a85 100644
--- a/examples/tiny/s1/conf/chunk_transformer.yaml
+++ b/examples/tiny/s1/conf/chunk_transformer.yaml
@@ -69,6 +69,8 @@ model:
     # hybrid CTC/attention
     model_conf:
         ctc_weight: 0.3
+        ctc_dropoutrate: 0.0
+        ctc_grad_norm_type: instance
         lsm_weight: 0.1     # label smoothing option
         length_normalized_loss: false
 
diff --git a/examples/tiny/s1/conf/conformer.yaml b/examples/tiny/s1/conf/conformer.yaml
index b40e77e3..9bb67c44 100644
--- a/examples/tiny/s1/conf/conformer.yaml
+++ b/examples/tiny/s1/conf/conformer.yaml
@@ -72,6 +72,8 @@ model:
     # hybrid CTC/attention
     model_conf:
         ctc_weight: 0.3
+        ctc_dropoutrate: 0.0
+        ctc_grad_norm_type: instance
         lsm_weight: 0.1     # label smoothing option
         length_normalized_loss: false
 
diff --git a/examples/tiny/s1/conf/transformer.yaml b/examples/tiny/s1/conf/transformer.yaml
index fd5adbde..5127e8c6 100644
--- a/examples/tiny/s1/conf/transformer.yaml
+++ b/examples/tiny/s1/conf/transformer.yaml
@@ -66,6 +66,8 @@ model:
     # hybrid CTC/attention
     model_conf:
         ctc_weight: 0.3
+        ctc_dropoutrate: 0.0
+        ctc_grad_norm_type: instance
         lsm_weight: 0.1     # label smoothing option
         length_normalized_loss: false