From e1888f9ae6d239b8c28f9739f7fd2a0120caac9e Mon Sep 17 00:00:00 2001 From: huangyuxin Date: Tue, 24 May 2022 12:37:42 +0000 Subject: [PATCH] remove size,test=asr --- paddlespeech/s2t/__init__.py | 19 ------------- .../s2t/decoders/beam_search/beam_search.py | 10 +++---- paddlespeech/s2t/decoders/scorers/ctc.py | 4 +-- .../s2t/decoders/scorers/ctc_prefix_score.py | 27 +++++++++---------- paddlespeech/s2t/models/u2/u2.py | 2 +- paddlespeech/s2t/modules/decoder.py | 2 +- paddlespeech/s2t/modules/embedding.py | 4 +-- paddlespeech/s2t/utils/tensor_utils.py | 6 ++--- 8 files changed, 27 insertions(+), 47 deletions(-) diff --git a/paddlespeech/s2t/__init__.py b/paddlespeech/s2t/__init__.py index 2365071f..7ec9e1ab 100644 --- a/paddlespeech/s2t/__init__.py +++ b/paddlespeech/s2t/__init__.py @@ -189,25 +189,6 @@ if not hasattr(paddle.Tensor, 'contiguous'): paddle.static.Variable.contiguous = contiguous -def size(xs: paddle.Tensor, *args: int) -> paddle.Tensor: - nargs = len(args) - assert (nargs <= 1) - s = paddle.shape(xs) - if nargs == 1: - return s[args[0]] - else: - return s - - -#`to_static` do not process `size` property, maybe some `paddle` api dependent on it. -logger.debug( - "override size of paddle.Tensor " - "(`to_static` do not process `size` property, maybe some `paddle` api dependent on it), remove this when fixed!" -) -paddle.Tensor.size = size -paddle.static.Variable.size = size - - def view(xs: paddle.Tensor, *args: int) -> paddle.Tensor: return xs.reshape(args) diff --git a/paddlespeech/s2t/decoders/beam_search/beam_search.py b/paddlespeech/s2t/decoders/beam_search/beam_search.py index f331cb1c..5029e157 100644 --- a/paddlespeech/s2t/decoders/beam_search/beam_search.py +++ b/paddlespeech/s2t/decoders/beam_search/beam_search.py @@ -194,7 +194,7 @@ class BeamSearch(paddle.nn.Layer): Args: hyp (Hypothesis): Hypothesis with prefix tokens to score - ids (paddle.Tensor): 1D tensor of new partial tokens to score, + ids (paddle.Tensor): 1D tensor of new partial tokens to score, len(ids) < n_vocab x (paddle.Tensor): Corresponding input feature, (T, D) @@ -224,14 +224,14 @@ class BeamSearch(paddle.nn.Layer): ids (paddle.Tensor): The partial token ids(Global) to compute topk. Returns: - Tuple[paddle.Tensor, paddle.Tensor]: + Tuple[paddle.Tensor, paddle.Tensor]: The topk full token ids and partial token ids. Their shapes are `(self.beam_size,)`. i.e. (global ids, global relative local ids). """ # no pre beam performed, `ids` equal to `weighted_scores` - if weighted_scores.size(0) == ids.size(0): + if weighted_scores.shape[0] == ids.shape[0]: top_ids = weighted_scores.topk( self.beam_size)[1] # index in n_vocab return top_ids, top_ids @@ -374,8 +374,8 @@ class BeamSearch(paddle.nn.Layer): elif maxlenratio < 0: maxlen = -1 * int(maxlenratio) else: - maxlen = max(1, int(maxlenratio * x.size(0))) - minlen = int(minlenratio * x.size(0)) + maxlen = max(1, int(maxlenratio * x.shape[0])) + minlen = int(minlenratio * x.shape[0]) logger.info("decoder input length: " + str(x.shape[0])) logger.info("max output length: " + str(maxlen)) logger.info("min output length: " + str(minlen)) diff --git a/paddlespeech/s2t/decoders/scorers/ctc.py b/paddlespeech/s2t/decoders/scorers/ctc.py index 81d8b078..6f1d8c00 100644 --- a/paddlespeech/s2t/decoders/scorers/ctc.py +++ b/paddlespeech/s2t/decoders/scorers/ctc.py @@ -69,7 +69,7 @@ class CTCPrefixScorer(BatchPartialScorerInterface): return sc[i], st[i] else: # for CTCPrefixScorePD (need new_id > 0) r, log_psi, f_min, f_max, scoring_idmap = state - s = log_psi[i, new_id].expand(log_psi.size(1)) + s = log_psi[i, new_id].expand(log_psi.shape[1]) if scoring_idmap is not None: return r[:, :, i, scoring_idmap[i, new_id]], s, f_min, f_max else: @@ -107,7 +107,7 @@ class CTCPrefixScorer(BatchPartialScorerInterface): """ logp = self.ctc.log_softmax(x.unsqueeze(0)) # assuming batch_size = 1 - xlen = paddle.to_tensor([logp.size(1)]) + xlen = paddle.to_tensor([logp.shape[1]]) self.impl = CTCPrefixScorePD(logp, xlen, 0, self.eos) return None diff --git a/paddlespeech/s2t/decoders/scorers/ctc_prefix_score.py b/paddlespeech/s2t/decoders/scorers/ctc_prefix_score.py index 78b8fe36..0e63a52a 100644 --- a/paddlespeech/s2t/decoders/scorers/ctc_prefix_score.py +++ b/paddlespeech/s2t/decoders/scorers/ctc_prefix_score.py @@ -33,9 +33,9 @@ class CTCPrefixScorePD(): self.logzero = -10000000000.0 self.blank = blank self.eos = eos - self.batch = x.size(0) - self.input_length = x.size(1) - self.odim = x.size(2) + self.batch = x.shape[0] + self.input_length = x.shape[1] + self.odim = x.shape[2] self.dtype = x.dtype # Pad the rest of posteriors in the batch @@ -76,8 +76,7 @@ class CTCPrefixScorePD(): last_ids = [yi[-1] for yi in y] # last output label ids n_bh = len(last_ids) # batch * hyps n_hyps = n_bh // self.batch # assuming each utterance has the same # of hyps - self.scoring_num = scoring_ids.size( - -1) if scoring_ids is not None else 0 + self.scoring_num = scoring_ids.shape[-1] if scoring_ids is not None else 0 # prepare state info if state is None: r_prev = paddle.full( @@ -153,7 +152,7 @@ class CTCPrefixScorePD(): # compute forward probabilities log(r_t^n(h)) and log(r_t^b(h)) for t in range(start, end): - rp = r[t - 1] # (2 x BW x O') + rp = r[t - 1] # (2 x BW x O') rr = paddle.stack([rp[0], log_phi[t - 1], rp[0], rp[1]]).view( 2, 2, n_bh, snum) # (2,2,BW,O') r[t] = paddle.logsumexp(rr, 1) + x_[:, t] @@ -227,7 +226,7 @@ class CTCPrefixScorePD(): if self.x.shape[1] < x.shape[1]: # self.x (2,T,B,O); x (B,T,O) # Pad the rest of posteriors in the batch # TODO(takaaki-hori): need a better way without for-loops - xlens = [x.size(1)] + xlens = [x.shape[1]] for i, l in enumerate(xlens): if l < self.input_length: x[i, l:, :] = self.logzero @@ -237,7 +236,7 @@ class CTCPrefixScorePD(): xb = xn[:, :, self.blank].unsqueeze(2).expand(-1, -1, self.odim) self.x = paddle.stack([xn, xb]) # (2, T, B, O) self.x[:, :tmp_x.shape[1], :, :] = tmp_x - self.input_length = x.size(1) + self.input_length = x.shape[1] self.end_frames = paddle.to_tensor(xlens) - 1 def extend_state(self, state): @@ -318,16 +317,16 @@ class CTCPrefixScore(): r[0, 0] = xs[0] r[0, 1] = self.logzero else: - # Although the code does not exactly follow Algorithm 2, - # we don't have to change it because we can assume - # r_t(h)=0 for t < |h| in CTC forward computation + # Although the code does not exactly follow Algorithm 2, + # we don't have to change it because we can assume + # r_t(h)=0 for t < |h| in CTC forward computation # (Note: we assume here that index t starts with 0). # The purpose of this difference is to reduce the number of for-loops. # https://github.com/espnet/espnet/pull/3655 - # where we start to accumulate r_t(h) from t=|h| - # and iterate r_t(h) = (r_{t-1}(h) + ...) to T-1, + # where we start to accumulate r_t(h) from t=|h| + # and iterate r_t(h) = (r_{t-1}(h) + ...) to T-1, # avoiding accumulating zeros for t=1~|h|-1. - # Thus, we need to set r_{|h|-1}(h) = 0, + # Thus, we need to set r_{|h|-1}(h) = 0, # i.e., r[output_length-1] = logzero, for initialization. # This is just for reducing the computation. r[output_length - 1] = self.logzero diff --git a/paddlespeech/s2t/models/u2/u2.py b/paddlespeech/s2t/models/u2/u2.py index 530840d0..e3f46b15 100644 --- a/paddlespeech/s2t/models/u2/u2.py +++ b/paddlespeech/s2t/models/u2/u2.py @@ -775,7 +775,7 @@ class U2DecodeModel(U2BaseModel): """ self.eval() x = paddle.to_tensor(x).unsqueeze(0) - ilen = x.size(1) + ilen = x.shape[1] enc_output, _ = self._forward_encoder(x, ilen) return enc_output.squeeze(0) diff --git a/paddlespeech/s2t/modules/decoder.py b/paddlespeech/s2t/modules/decoder.py index 42ac119b..ce78059c 100644 --- a/paddlespeech/s2t/modules/decoder.py +++ b/paddlespeech/s2t/modules/decoder.py @@ -242,7 +242,7 @@ class TransformerDecoder(BatchScorerInterface, nn.Layer): ] # batch decoding - ys_mask = subsequent_mask(ys.size(-1)).unsqueeze(0) # (B,L,L) + ys_mask = subsequent_mask(ys.shape[-1]).unsqueeze(0) # (B,L,L) xs_mask = make_xs_mask(xs).unsqueeze(1) # (B,1,T) logp, states = self.forward_one_step( xs, xs_mask, ys, ys_mask, cache=batch_state) diff --git a/paddlespeech/s2t/modules/embedding.py b/paddlespeech/s2t/modules/embedding.py index 596f61b7..cc1fdffe 100644 --- a/paddlespeech/s2t/modules/embedding.py +++ b/paddlespeech/s2t/modules/embedding.py @@ -115,7 +115,7 @@ class PositionalEncoding(nn.Layer, PositionalEncodingInterface): assert offset + x.shape[ 1] < self.max_len, "offset: {} + x.shape[1]: {} is larger than the max_len: {}".format( offset, x.shape[1], self.max_len) - #TODO(Hui Zhang): using T = x.size(1), __getitem__ not support Tensor + #TODO(Hui Zhang): using T = x.shape[1], __getitem__ not support Tensor pos_emb = self.pe[:, offset:offset + T] x = x * self.xscale + pos_emb return self.dropout(x), self.dropout(pos_emb) @@ -165,6 +165,6 @@ class RelPositionalEncoding(PositionalEncoding): 1] < self.max_len, "offset: {} + x.shape[1]: {} is larger than the max_len: {}".format( offset, x.shape[1], self.max_len) x = x * self.xscale - #TODO(Hui Zhang): using x.size(1), __getitem__ not support Tensor + #TODO(Hui Zhang): using x.shape[1], __getitem__ not support Tensor pos_emb = self.pe[:, offset:offset + x.shape[1]] return self.dropout(x), self.dropout(pos_emb) diff --git a/paddlespeech/s2t/utils/tensor_utils.py b/paddlespeech/s2t/utils/tensor_utils.py index e105253c..ca868956 100644 --- a/paddlespeech/s2t/utils/tensor_utils.py +++ b/paddlespeech/s2t/utils/tensor_utils.py @@ -58,8 +58,8 @@ def pad_sequence(sequences: List[paddle.Tensor], >>> a = paddle.ones(25, 300) >>> b = paddle.ones(22, 300) >>> c = paddle.ones(15, 300) - >>> pad_sequence([a, b, c]).size() - paddle.Tensor([25, 3, 300]) + >>> pad_sequence([a, b, c]).shape + [25, 3, 300] Note: This function returns a Tensor of size ``T x B x *`` or ``B x T x *`` @@ -79,7 +79,7 @@ def pad_sequence(sequences: List[paddle.Tensor], # assuming trailing dimensions and type of all the Tensors # in sequences are same and fetching those from sequences[0] - max_size = sequences[0].size() + max_size = sequences[0].shape # (TODO Hui Zhang): slice not supprot `end==start` # trailing_dims = max_size[1:] trailing_dims = tuple(max_size[1:].numpy().tolist()) if sequences[0].ndim >= 2 else ()