diff --git a/paddlespeech/s2t/__init__.py b/paddlespeech/s2t/__init__.py
index 2365071f..2da68435 100644
--- a/paddlespeech/s2t/__init__.py
+++ b/paddlespeech/s2t/__init__.py
@@ -189,25 +189,6 @@ if not hasattr(paddle.Tensor, 'contiguous'):
     paddle.static.Variable.contiguous = contiguous
 
 
-def size(xs: paddle.Tensor, *args: int) -> paddle.Tensor:
-    nargs = len(args)
-    assert (nargs <= 1)
-    s = paddle.shape(xs)
-    if nargs == 1:
-        return s[args[0]]
-    else:
-        return s
-
-
-#`to_static` do not process `size` property, maybe some `paddle` api dependent on it.
-logger.debug(
-    "override size of paddle.Tensor "
-    "(`to_static` do not process `size` property, maybe some `paddle` api dependent on it), remove this when fixed!"
-)
-paddle.Tensor.size = size
-paddle.static.Variable.size = size
-
-
 def view(xs: paddle.Tensor, *args: int) -> paddle.Tensor:
     return xs.reshape(args)
 
@@ -219,7 +200,7 @@ if not hasattr(paddle.Tensor, 'view'):
 
 
 def view_as(xs: paddle.Tensor, ys: paddle.Tensor) -> paddle.Tensor:
-    return xs.reshape(ys.size())
+    return xs.reshape(paddle.shape(ys))
 
 
 if not hasattr(paddle.Tensor, 'view_as'):
diff --git a/paddlespeech/s2t/decoders/beam_search/beam_search.py b/paddlespeech/s2t/decoders/beam_search/beam_search.py
index f331cb1c..f6a2b4b0 100644
--- a/paddlespeech/s2t/decoders/beam_search/beam_search.py
+++ b/paddlespeech/s2t/decoders/beam_search/beam_search.py
@@ -194,7 +194,7 @@ class BeamSearch(paddle.nn.Layer):
 
         Args:
             hyp (Hypothesis): Hypothesis with prefix tokens to score
-            ids (paddle.Tensor): 1D tensor of new partial tokens to score, 
+            ids (paddle.Tensor): 1D tensor of new partial tokens to score,
                 len(ids) < n_vocab
             x (paddle.Tensor): Corresponding input feature, (T, D)
 
@@ -224,14 +224,14 @@ class BeamSearch(paddle.nn.Layer):
             ids (paddle.Tensor): The partial token ids(Global) to compute topk.
 
         Returns:
-            Tuple[paddle.Tensor, paddle.Tensor]: 
+            Tuple[paddle.Tensor, paddle.Tensor]:
                 The topk full token ids and partial token ids.
                 Their shapes are `(self.beam_size,)`.
                 i.e. (global ids, global relative local ids).
 
         """
         # no pre beam performed, `ids` equal to `weighted_scores`
-        if weighted_scores.size(0) == ids.size(0):
+        if paddle.shape(weighted_scores)[0] == paddle.shape(ids)[0]:
             top_ids = weighted_scores.topk(
                 self.beam_size)[1]  # index in n_vocab
             return top_ids, top_ids
@@ -370,13 +370,13 @@ class BeamSearch(paddle.nn.Layer):
         """
         # set length bounds
         if maxlenratio == 0:
-            maxlen = x.shape[0]
+            maxlen = paddle.shape(x)[0]
         elif maxlenratio < 0:
             maxlen = -1 * int(maxlenratio)
         else:
-            maxlen = max(1, int(maxlenratio * x.size(0)))
-        minlen = int(minlenratio * x.size(0))
-        logger.info("decoder input length: " + str(x.shape[0]))
+            maxlen = max(1, int(maxlenratio * paddle.shape(x)[0]))
+        minlen = int(minlenratio * paddle.shape(x)[0])
+        logger.info("decoder input length: " + str(paddle.shape(x)[0]))
         logger.info("max output length: " + str(maxlen))
         logger.info("min output length: " + str(minlen))
 
diff --git a/paddlespeech/s2t/decoders/scorers/ctc.py b/paddlespeech/s2t/decoders/scorers/ctc.py
index 81d8b078..3c1d4cf8 100644
--- a/paddlespeech/s2t/decoders/scorers/ctc.py
+++ b/paddlespeech/s2t/decoders/scorers/ctc.py
@@ -69,7 +69,7 @@ class CTCPrefixScorer(BatchPartialScorerInterface):
                 return sc[i], st[i]
             else:  # for CTCPrefixScorePD (need new_id > 0)
                 r, log_psi, f_min, f_max, scoring_idmap = state
-                s = log_psi[i, new_id].expand(log_psi.size(1))
+                s = log_psi[i, new_id].expand(paddle.shape(log_psi)[1])
                 if scoring_idmap is not None:
                     return r[:, :, i, scoring_idmap[i, new_id]], s, f_min, f_max
                 else:
@@ -107,7 +107,7 @@ class CTCPrefixScorer(BatchPartialScorerInterface):
 
         """
         logp = self.ctc.log_softmax(x.unsqueeze(0))  # assuming batch_size = 1
-        xlen = paddle.to_tensor([logp.size(1)])
+        xlen = paddle.to_tensor([paddle.shape(logp)[1]])
         self.impl = CTCPrefixScorePD(logp, xlen, 0, self.eos)
         return None
 
diff --git a/paddlespeech/s2t/decoders/scorers/ctc_prefix_score.py b/paddlespeech/s2t/decoders/scorers/ctc_prefix_score.py
index 78b8fe36..d8ca5ccd 100644
--- a/paddlespeech/s2t/decoders/scorers/ctc_prefix_score.py
+++ b/paddlespeech/s2t/decoders/scorers/ctc_prefix_score.py
@@ -33,9 +33,9 @@ class CTCPrefixScorePD():
         self.logzero = -10000000000.0
         self.blank = blank
         self.eos = eos
-        self.batch = x.size(0)
-        self.input_length = x.size(1)
-        self.odim = x.size(2)
+        self.batch = paddle.shape(x)[0]
+        self.input_length = paddle.shape(x)[1]
+        self.odim = paddle.shape(x)[2]
         self.dtype = x.dtype
 
         # Pad the rest of posteriors in the batch
@@ -76,8 +76,7 @@ class CTCPrefixScorePD():
         last_ids = [yi[-1] for yi in y]  # last output label ids
         n_bh = len(last_ids)  # batch * hyps
         n_hyps = n_bh // self.batch  # assuming each utterance has the same # of hyps
-        self.scoring_num = scoring_ids.size(
-            -1) if scoring_ids is not None else 0
+        self.scoring_num = paddle.shape(scoring_ids)[-1] if scoring_ids is not None else 0
         # prepare state info
         if state is None:
             r_prev = paddle.full(
@@ -153,7 +152,7 @@ class CTCPrefixScorePD():
 
         # compute forward probabilities log(r_t^n(h)) and log(r_t^b(h))
         for t in range(start, end):
-            rp = r[t - 1]  # (2 x BW x O') 
+            rp = r[t - 1]  # (2 x BW x O')
             rr = paddle.stack([rp[0], log_phi[t - 1], rp[0], rp[1]]).view(
                 2, 2, n_bh, snum)  # (2,2,BW,O')
             r[t] = paddle.logsumexp(rr, 1) + x_[:, t]
@@ -227,7 +226,7 @@ class CTCPrefixScorePD():
         if self.x.shape[1] < x.shape[1]:  # self.x (2,T,B,O); x (B,T,O)
             # Pad the rest of posteriors in the batch
             # TODO(takaaki-hori): need a better way without for-loops
-            xlens = [x.size(1)]
+            xlens = [paddle.shape(x)[1]]
             for i, l in enumerate(xlens):
                 if l < self.input_length:
                     x[i, l:, :] = self.logzero
@@ -237,7 +236,7 @@ class CTCPrefixScorePD():
             xb = xn[:, :, self.blank].unsqueeze(2).expand(-1, -1, self.odim)
             self.x = paddle.stack([xn, xb])  # (2, T, B, O)
             self.x[:, :tmp_x.shape[1], :, :] = tmp_x
-            self.input_length = x.size(1)
+            self.input_length = paddle.shape(x)[1]
             self.end_frames = paddle.to_tensor(xlens) - 1
 
     def extend_state(self, state):
@@ -318,16 +317,16 @@ class CTCPrefixScore():
             r[0, 0] = xs[0]
             r[0, 1] = self.logzero
         else:
-            # Although the code does not exactly follow Algorithm 2, 
-            # we don't have to change it because we can assume 
-            # r_t(h)=0 for t < |h| in CTC forward computation 
+            # Although the code does not exactly follow Algorithm 2,
+            # we don't have to change it because we can assume
+            # r_t(h)=0 for t < |h| in CTC forward computation
             # (Note: we assume here that index t starts with 0).
             # The purpose of this difference is to reduce the number of for-loops.
             # https://github.com/espnet/espnet/pull/3655
-            # where we start to accumulate r_t(h) from t=|h| 
-            # and iterate r_t(h) = (r_{t-1}(h) + ...) to T-1, 
+            # where we start to accumulate r_t(h) from t=|h|
+            # and iterate r_t(h) = (r_{t-1}(h) + ...) to T-1,
             # avoiding accumulating zeros for t=1~|h|-1.
-            # Thus, we need to set r_{|h|-1}(h) = 0, 
+            # Thus, we need to set r_{|h|-1}(h) = 0,
             # i.e., r[output_length-1] = logzero, for initialization.
             # This is just for reducing the computation.
             r[output_length - 1] = self.logzero
diff --git a/paddlespeech/s2t/models/lm/transformer.py b/paddlespeech/s2t/models/lm/transformer.py
index 85bd7c23..d14f9956 100644
--- a/paddlespeech/s2t/models/lm/transformer.py
+++ b/paddlespeech/s2t/models/lm/transformer.py
@@ -90,7 +90,7 @@ class TransformerLM(nn.Layer, LMInterface, BatchScorerInterface):
 
     def _target_mask(self, ys_in_pad):
         ys_mask = ys_in_pad != 0
-        m = subsequent_mask(ys_mask.size(-1)).unsqueeze(0)
+        m = subsequent_mask(paddle.shape(ys_mask)[-1])).unsqueeze(0)
         return ys_mask.unsqueeze(-2) & m
 
     def forward(self, x: paddle.Tensor, t: paddle.Tensor
@@ -112,7 +112,7 @@ class TransformerLM(nn.Layer, LMInterface, BatchScorerInterface):
             in perplexity: p(t)^{-n} = exp(-log p(t) / n)
 
         """
-        batch_size = x.size(0)
+        batch_size = paddle.shape(x)[0]
         xm = x != 0
         xlen = xm.sum(axis=1)
         if self.embed_drop is not None:
@@ -122,7 +122,7 @@ class TransformerLM(nn.Layer, LMInterface, BatchScorerInterface):
         h, _ = self.encoder(emb, xlen)
         y = self.decoder(h)
         loss = F.cross_entropy(
-            y.view(-1, y.shape[-1]), t.view(-1), reduction="none")
+            y.view(-1, paddle.shape(y)[-1]), t.view(-1), reduction="none")
         mask = xm.to(loss.dtype)
         logp = loss * mask.view(-1)
         nll = logp.view(batch_size, -1).sum(-1)
diff --git a/paddlespeech/s2t/models/u2/u2.py b/paddlespeech/s2t/models/u2/u2.py
index 530840d0..d5471369 100644
--- a/paddlespeech/s2t/models/u2/u2.py
+++ b/paddlespeech/s2t/models/u2/u2.py
@@ -775,7 +775,7 @@ class U2DecodeModel(U2BaseModel):
         """
         self.eval()
         x = paddle.to_tensor(x).unsqueeze(0)
-        ilen = x.size(1)
+        ilen = paddle.shape(x)[1]
         enc_output, _ = self._forward_encoder(x, ilen)
         return enc_output.squeeze(0)
 
diff --git a/paddlespeech/s2t/modules/decoder.py b/paddlespeech/s2t/modules/decoder.py
index 42ac119b..ccc8482d 100644
--- a/paddlespeech/s2t/modules/decoder.py
+++ b/paddlespeech/s2t/modules/decoder.py
@@ -242,7 +242,7 @@ class TransformerDecoder(BatchScorerInterface, nn.Layer):
             ]
 
         # batch decoding
-        ys_mask = subsequent_mask(ys.size(-1)).unsqueeze(0)  # (B,L,L)
+        ys_mask = subsequent_mask(paddle.shape(ys)[-1]).unsqueeze(0)  # (B,L,L)
         xs_mask = make_xs_mask(xs).unsqueeze(1)  # (B,1,T)
         logp, states = self.forward_one_step(
             xs, xs_mask, ys, ys_mask, cache=batch_state)
diff --git a/paddlespeech/s2t/modules/embedding.py b/paddlespeech/s2t/modules/embedding.py
index 596f61b7..51e558eb 100644
--- a/paddlespeech/s2t/modules/embedding.py
+++ b/paddlespeech/s2t/modules/embedding.py
@@ -115,7 +115,7 @@ class PositionalEncoding(nn.Layer, PositionalEncodingInterface):
         assert offset + x.shape[
             1] < self.max_len, "offset: {} + x.shape[1]: {} is larger than the max_len: {}".format(
                 offset, x.shape[1], self.max_len)
-        #TODO(Hui Zhang): using T = x.size(1), __getitem__ not support Tensor
+        #TODO(Hui Zhang): using T = paddle.shape(x)[1], __getitem__ not support Tensor
         pos_emb = self.pe[:, offset:offset + T]
         x = x * self.xscale + pos_emb
         return self.dropout(x), self.dropout(pos_emb)
@@ -165,6 +165,6 @@ class RelPositionalEncoding(PositionalEncoding):
             1] < self.max_len, "offset: {} + x.shape[1]: {} is larger than the max_len: {}".format(
                 offset, x.shape[1], self.max_len)
         x = x * self.xscale
-        #TODO(Hui Zhang): using x.size(1), __getitem__ not support Tensor
+        #TODO(Hui Zhang): using paddle.shape(x)[1], __getitem__ not support Tensor
         pos_emb = self.pe[:, offset:offset + x.shape[1]]
         return self.dropout(x), self.dropout(pos_emb)
diff --git a/paddlespeech/s2t/modules/encoder.py b/paddlespeech/s2t/modules/encoder.py
index 669a12d6..4d31acf1 100644
--- a/paddlespeech/s2t/modules/encoder.py
+++ b/paddlespeech/s2t/modules/encoder.py
@@ -218,7 +218,7 @@ class BaseEncoder(nn.Layer):
         assert xs.shape[0] == 1  # batch size must be one
         # tmp_masks is just for interface compatibility
         # TODO(Hui Zhang): stride_slice not support bool tensor
-        # tmp_masks = paddle.ones([1, xs.size(1)], dtype=paddle.bool)
+        # tmp_masks = paddle.ones([1, paddle.shape(xs)[1]], dtype=paddle.bool)
         tmp_masks = paddle.ones([1, xs.shape[1]], dtype=paddle.int32)
         tmp_masks = tmp_masks.unsqueeze(1)  #[B=1, C=1, T]
 
diff --git a/paddlespeech/s2t/utils/tensor_utils.py b/paddlespeech/s2t/utils/tensor_utils.py
index 0dbaa0b6..bc557b13 100644
--- a/paddlespeech/s2t/utils/tensor_utils.py
+++ b/paddlespeech/s2t/utils/tensor_utils.py
@@ -58,7 +58,7 @@ def pad_sequence(sequences: List[paddle.Tensor],
         >>> a = paddle.ones(25, 300)
         >>> b = paddle.ones(22, 300)
         >>> c = paddle.ones(15, 300)
-        >>> pad_sequence([a, b, c]).size()
+        >>> pad_sequence([a, b, c]).shape
         paddle.Tensor([25, 3, 300])
 
     Note:
@@ -79,10 +79,10 @@ def pad_sequence(sequences: List[paddle.Tensor],
 
     # assuming trailing dimensions and type of all the Tensors
     # in sequences are same and fetching those from sequences[0]
-    max_size = sequences[0].size()
+    max_size = paddle.shape(sequences[0])
     # (TODO Hui Zhang): slice not supprot `end==start`
     # trailing_dims = max_size[1:]
-    trailing_dims = max_size[1:] if max_size.ndim >= 2 else ()
+    trailing_dims = tuple(max_size[1:].numpy().tolist()) if sequences[0].ndim >= 2 else ()
     max_len = max([s.shape[0] for s in sequences])
     if batch_first:
         out_dims = (len(sequences), max_len) + trailing_dims
@@ -99,7 +99,7 @@ def pad_sequence(sequences: List[paddle.Tensor],
         if batch_first:
             # TODO (Hui Zhang): set_value op not supprot `end==start`
             # TODO (Hui Zhang): set_value op not support int16
-            # TODO (Hui Zhang): set_varbase 2 rank not support [0,0,...] 
+            # TODO (Hui Zhang): set_varbase 2 rank not support [0,0,...]
             # out_tensor[i, :length, ...] = tensor
             if length != 0:
                 out_tensor[i, :length] = tensor
@@ -145,7 +145,7 @@ def add_sos_eos(ys_pad: paddle.Tensor, sos: int, eos: int,
                 [ 4,  5,  6, 11, -1, -1],
                 [ 7,  8,  9, 11, -1, -1]])
     """
-    # TODO(Hui Zhang): using comment code, 
+    # TODO(Hui Zhang): using comment code,
     #_sos = paddle.to_tensor(
     #    [sos], dtype=paddle.long, stop_gradient=True, place=ys_pad.place)
     #_eos = paddle.to_tensor(