remove size,test=asr

3 years ago · e1888f9ae6
parent 1cdd41bd03
commit e1888f9ae6
8 changed files with 27 additions and 47 deletions
--- a/paddlespeech/s2t/init.py
+++ b/paddlespeech/s2t/init.py
@ -189,25 +189,6 @@ if not hasattr(paddle.Tensor, 'contiguous'):
    paddle.static.Variable.contiguous = contiguous


-def size(xs: paddle.Tensor, *args: int) -> paddle.Tensor:
-    nargs = len(args)
-    assert (nargs <= 1)
-    s = paddle.shape(xs)
-    if nargs == 1:
-        return s[args[0]]
-    else:
-        return s
-
-
-#`to_static` do not process `size` property, maybe some `paddle` api dependent on it.
-logger.debug(
-    "override size of paddle.Tensor "
-    "(`to_static` do not process `size` property, maybe some `paddle` api dependent on it), remove this when fixed!"
-)
-paddle.Tensor.size = size
-paddle.static.Variable.size = size
-
-
 def view(xs: paddle.Tensor, *args: int) -> paddle.Tensor:
    return xs.reshape(args)

--- a/paddlespeech/s2t/decoders/beam_search/beam_search.py
+++ b/paddlespeech/s2t/decoders/beam_search/beam_search.py
@ -231,7 +231,7 @@ class BeamSearch(paddle.nn.Layer):

        """
        # no pre beam performed, `ids` equal to `weighted_scores`
-        if weighted_scores.size(0) == ids.size(0):
+        if weighted_scores.shape[0] == ids.shape[0]:
            top_ids = weighted_scores.topk(
                self.beam_size)[1]  # index in n_vocab
            return top_ids, top_ids
@ -374,8 +374,8 @@ class BeamSearch(paddle.nn.Layer):
        elif maxlenratio < 0:
            maxlen = -1 * int(maxlenratio)
        else:
-            maxlen = max(1, int(maxlenratio * x.size(0)))
-        minlen = int(minlenratio * x.size(0))
+            maxlen = max(1, int(maxlenratio * x.shape[0]))
+        minlen = int(minlenratio * x.shape[0])
        logger.info("decoder input length: " + str(x.shape[0]))
        logger.info("max output length: " + str(maxlen))
        logger.info("min output length: " + str(minlen))
--- a/paddlespeech/s2t/decoders/scorers/ctc.py
+++ b/paddlespeech/s2t/decoders/scorers/ctc.py
@ -69,7 +69,7 @@ class CTCPrefixScorer(BatchPartialScorerInterface):
                return sc[i], st[i]
            else:  # for CTCPrefixScorePD (need new_id > 0)
                r, log_psi, f_min, f_max, scoring_idmap = state
-                s = log_psi[i, new_id].expand(log_psi.size(1))
+                s = log_psi[i, new_id].expand(log_psi.shape[1])
                if scoring_idmap is not None:
                    return r[:, :, i, scoring_idmap[i, new_id]], s, f_min, f_max
                else:
@ -107,7 +107,7 @@ class CTCPrefixScorer(BatchPartialScorerInterface):

        """
        logp = self.ctc.log_softmax(x.unsqueeze(0))  # assuming batch_size = 1
-        xlen = paddle.to_tensor([logp.size(1)])
+        xlen = paddle.to_tensor([logp.shape[1]])
        self.impl = CTCPrefixScorePD(logp, xlen, 0, self.eos)
        return None

--- a/paddlespeech/s2t/decoders/scorers/ctc_prefix_score.py
+++ b/paddlespeech/s2t/decoders/scorers/ctc_prefix_score.py
@ -33,9 +33,9 @@ class CTCPrefixScorePD():
        self.logzero = -10000000000.0
        self.blank = blank
        self.eos = eos
-        self.batch = x.size(0)
-        self.input_length = x.size(1)
-        self.odim = x.size(2)
+        self.batch = x.shape[0]
+        self.input_length = x.shape[1]
+        self.odim = x.shape[2]
        self.dtype = x.dtype

        # Pad the rest of posteriors in the batch
@ -76,8 +76,7 @@ class CTCPrefixScorePD():
        last_ids = [yi[-1] for yi in y]  # last output label ids
        n_bh = len(last_ids)  # batch * hyps
        n_hyps = n_bh // self.batch  # assuming each utterance has the same # of hyps
-        self.scoring_num = scoring_ids.size(
-            -1) if scoring_ids is not None else 0
+        self.scoring_num = scoring_ids.shape[-1] if scoring_ids is not None else 0
        # prepare state info
        if state is None:
            r_prev = paddle.full(
@ -227,7 +226,7 @@ class CTCPrefixScorePD():
        if self.x.shape[1] < x.shape[1]:  # self.x (2,T,B,O); x (B,T,O)
            # Pad the rest of posteriors in the batch
            # TODO(takaaki-hori): need a better way without for-loops
-            xlens = [x.size(1)]
+            xlens = [x.shape[1]]
            for i, l in enumerate(xlens):
                if l < self.input_length:
                    x[i, l:, :] = self.logzero
@ -237,7 +236,7 @@ class CTCPrefixScorePD():
            xb = xn[:, :, self.blank].unsqueeze(2).expand(-1, -1, self.odim)
            self.x = paddle.stack([xn, xb])  # (2, T, B, O)
            self.x[:, :tmp_x.shape[1], :, :] = tmp_x
-            self.input_length = x.size(1)
+            self.input_length = x.shape[1]
            self.end_frames = paddle.to_tensor(xlens) - 1

    def extend_state(self, state):
--- a/paddlespeech/s2t/models/u2/u2.py
+++ b/paddlespeech/s2t/models/u2/u2.py
@ -775,7 +775,7 @@ class U2DecodeModel(U2BaseModel):
        """
        self.eval()
        x = paddle.to_tensor(x).unsqueeze(0)
-        ilen = x.size(1)
+        ilen = x.shape[1]
        enc_output, _ = self._forward_encoder(x, ilen)
        return enc_output.squeeze(0)

--- a/paddlespeech/s2t/modules/decoder.py
+++ b/paddlespeech/s2t/modules/decoder.py
@ -242,7 +242,7 @@ class TransformerDecoder(BatchScorerInterface, nn.Layer):
            ]

        # batch decoding
-        ys_mask = subsequent_mask(ys.size(-1)).unsqueeze(0)  # (B,L,L)
+        ys_mask = subsequent_mask(ys.shape[-1]).unsqueeze(0)  # (B,L,L)
        xs_mask = make_xs_mask(xs).unsqueeze(1)  # (B,1,T)
        logp, states = self.forward_one_step(
            xs, xs_mask, ys, ys_mask, cache=batch_state)
--- a/paddlespeech/s2t/modules/embedding.py
+++ b/paddlespeech/s2t/modules/embedding.py
@ -115,7 +115,7 @@ class PositionalEncoding(nn.Layer, PositionalEncodingInterface):
        assert offset + x.shape[
            1] < self.max_len, "offset: {} + x.shape[1]: {} is larger than the max_len: {}".format(
                offset, x.shape[1], self.max_len)
-        #TODO(Hui Zhang): using T = x.size(1), __getitem__ not support Tensor
+        #TODO(Hui Zhang): using T = x.shape[1], __getitem__ not support Tensor
        pos_emb = self.pe[:, offset:offset + T]
        x = x * self.xscale + pos_emb
        return self.dropout(x), self.dropout(pos_emb)
@ -165,6 +165,6 @@ class RelPositionalEncoding(PositionalEncoding):
            1] < self.max_len, "offset: {} + x.shape[1]: {} is larger than the max_len: {}".format(
                offset, x.shape[1], self.max_len)
        x = x * self.xscale
-        #TODO(Hui Zhang): using x.size(1), __getitem__ not support Tensor
+        #TODO(Hui Zhang): using x.shape[1], __getitem__ not support Tensor
        pos_emb = self.pe[:, offset:offset + x.shape[1]]
        return self.dropout(x), self.dropout(pos_emb)
--- a/paddlespeech/s2t/utils/tensor_utils.py
+++ b/paddlespeech/s2t/utils/tensor_utils.py
@ -58,8 +58,8 @@ def pad_sequence(sequences: List[paddle.Tensor],
        >>> a = paddle.ones(25, 300)
        >>> b = paddle.ones(22, 300)
        >>> c = paddle.ones(15, 300)
-        >>> pad_sequence([a, b, c]).size()
-        paddle.Tensor([25, 3, 300])
+        >>> pad_sequence([a, b, c]).shape
+        [25, 3, 300]

    Note:
        This function returns a Tensor of size ``T x B x *`` or ``B x T x *``
@ -79,7 +79,7 @@ def pad_sequence(sequences: List[paddle.Tensor],

    # assuming trailing dimensions and type of all the Tensors
    # in sequences are same and fetching those from sequences[0]
-    max_size = sequences[0].size()
+    max_size = sequences[0].shape
    # (TODO Hui Zhang): slice not supprot `end==start`
    # trailing_dims = max_size[1:]
    trailing_dims = tuple(max_size[1:].numpy().tolist()) if sequences[0].ndim >= 2 else ()