remove size,test=asr

pull/1950/head
huangyuxin 2 years ago
parent 1cdd41bd03
commit e1888f9ae6

@ -189,25 +189,6 @@ if not hasattr(paddle.Tensor, 'contiguous'):
paddle.static.Variable.contiguous = contiguous
def size(xs: paddle.Tensor, *args: int) -> paddle.Tensor:
nargs = len(args)
assert (nargs <= 1)
s = paddle.shape(xs)
if nargs == 1:
return s[args[0]]
else:
return s
#`to_static` do not process `size` property, maybe some `paddle` api dependent on it.
logger.debug(
"override size of paddle.Tensor "
"(`to_static` do not process `size` property, maybe some `paddle` api dependent on it), remove this when fixed!"
)
paddle.Tensor.size = size
paddle.static.Variable.size = size
def view(xs: paddle.Tensor, *args: int) -> paddle.Tensor:
return xs.reshape(args)

@ -194,7 +194,7 @@ class BeamSearch(paddle.nn.Layer):
Args:
hyp (Hypothesis): Hypothesis with prefix tokens to score
ids (paddle.Tensor): 1D tensor of new partial tokens to score,
ids (paddle.Tensor): 1D tensor of new partial tokens to score,
len(ids) < n_vocab
x (paddle.Tensor): Corresponding input feature, (T, D)
@ -224,14 +224,14 @@ class BeamSearch(paddle.nn.Layer):
ids (paddle.Tensor): The partial token ids(Global) to compute topk.
Returns:
Tuple[paddle.Tensor, paddle.Tensor]:
Tuple[paddle.Tensor, paddle.Tensor]:
The topk full token ids and partial token ids.
Their shapes are `(self.beam_size,)`.
i.e. (global ids, global relative local ids).
"""
# no pre beam performed, `ids` equal to `weighted_scores`
if weighted_scores.size(0) == ids.size(0):
if weighted_scores.shape[0] == ids.shape[0]:
top_ids = weighted_scores.topk(
self.beam_size)[1] # index in n_vocab
return top_ids, top_ids
@ -374,8 +374,8 @@ class BeamSearch(paddle.nn.Layer):
elif maxlenratio < 0:
maxlen = -1 * int(maxlenratio)
else:
maxlen = max(1, int(maxlenratio * x.size(0)))
minlen = int(minlenratio * x.size(0))
maxlen = max(1, int(maxlenratio * x.shape[0]))
minlen = int(minlenratio * x.shape[0])
logger.info("decoder input length: " + str(x.shape[0]))
logger.info("max output length: " + str(maxlen))
logger.info("min output length: " + str(minlen))

@ -69,7 +69,7 @@ class CTCPrefixScorer(BatchPartialScorerInterface):
return sc[i], st[i]
else: # for CTCPrefixScorePD (need new_id > 0)
r, log_psi, f_min, f_max, scoring_idmap = state
s = log_psi[i, new_id].expand(log_psi.size(1))
s = log_psi[i, new_id].expand(log_psi.shape[1])
if scoring_idmap is not None:
return r[:, :, i, scoring_idmap[i, new_id]], s, f_min, f_max
else:
@ -107,7 +107,7 @@ class CTCPrefixScorer(BatchPartialScorerInterface):
"""
logp = self.ctc.log_softmax(x.unsqueeze(0)) # assuming batch_size = 1
xlen = paddle.to_tensor([logp.size(1)])
xlen = paddle.to_tensor([logp.shape[1]])
self.impl = CTCPrefixScorePD(logp, xlen, 0, self.eos)
return None

@ -33,9 +33,9 @@ class CTCPrefixScorePD():
self.logzero = -10000000000.0
self.blank = blank
self.eos = eos
self.batch = x.size(0)
self.input_length = x.size(1)
self.odim = x.size(2)
self.batch = x.shape[0]
self.input_length = x.shape[1]
self.odim = x.shape[2]
self.dtype = x.dtype
# Pad the rest of posteriors in the batch
@ -76,8 +76,7 @@ class CTCPrefixScorePD():
last_ids = [yi[-1] for yi in y] # last output label ids
n_bh = len(last_ids) # batch * hyps
n_hyps = n_bh // self.batch # assuming each utterance has the same # of hyps
self.scoring_num = scoring_ids.size(
-1) if scoring_ids is not None else 0
self.scoring_num = scoring_ids.shape[-1] if scoring_ids is not None else 0
# prepare state info
if state is None:
r_prev = paddle.full(
@ -153,7 +152,7 @@ class CTCPrefixScorePD():
# compute forward probabilities log(r_t^n(h)) and log(r_t^b(h))
for t in range(start, end):
rp = r[t - 1] # (2 x BW x O')
rp = r[t - 1] # (2 x BW x O')
rr = paddle.stack([rp[0], log_phi[t - 1], rp[0], rp[1]]).view(
2, 2, n_bh, snum) # (2,2,BW,O')
r[t] = paddle.logsumexp(rr, 1) + x_[:, t]
@ -227,7 +226,7 @@ class CTCPrefixScorePD():
if self.x.shape[1] < x.shape[1]: # self.x (2,T,B,O); x (B,T,O)
# Pad the rest of posteriors in the batch
# TODO(takaaki-hori): need a better way without for-loops
xlens = [x.size(1)]
xlens = [x.shape[1]]
for i, l in enumerate(xlens):
if l < self.input_length:
x[i, l:, :] = self.logzero
@ -237,7 +236,7 @@ class CTCPrefixScorePD():
xb = xn[:, :, self.blank].unsqueeze(2).expand(-1, -1, self.odim)
self.x = paddle.stack([xn, xb]) # (2, T, B, O)
self.x[:, :tmp_x.shape[1], :, :] = tmp_x
self.input_length = x.size(1)
self.input_length = x.shape[1]
self.end_frames = paddle.to_tensor(xlens) - 1
def extend_state(self, state):
@ -318,16 +317,16 @@ class CTCPrefixScore():
r[0, 0] = xs[0]
r[0, 1] = self.logzero
else:
# Although the code does not exactly follow Algorithm 2,
# we don't have to change it because we can assume
# r_t(h)=0 for t < |h| in CTC forward computation
# Although the code does not exactly follow Algorithm 2,
# we don't have to change it because we can assume
# r_t(h)=0 for t < |h| in CTC forward computation
# (Note: we assume here that index t starts with 0).
# The purpose of this difference is to reduce the number of for-loops.
# https://github.com/espnet/espnet/pull/3655
# where we start to accumulate r_t(h) from t=|h|
# and iterate r_t(h) = (r_{t-1}(h) + ...) to T-1,
# where we start to accumulate r_t(h) from t=|h|
# and iterate r_t(h) = (r_{t-1}(h) + ...) to T-1,
# avoiding accumulating zeros for t=1~|h|-1.
# Thus, we need to set r_{|h|-1}(h) = 0,
# Thus, we need to set r_{|h|-1}(h) = 0,
# i.e., r[output_length-1] = logzero, for initialization.
# This is just for reducing the computation.
r[output_length - 1] = self.logzero

@ -775,7 +775,7 @@ class U2DecodeModel(U2BaseModel):
"""
self.eval()
x = paddle.to_tensor(x).unsqueeze(0)
ilen = x.size(1)
ilen = x.shape[1]
enc_output, _ = self._forward_encoder(x, ilen)
return enc_output.squeeze(0)

@ -242,7 +242,7 @@ class TransformerDecoder(BatchScorerInterface, nn.Layer):
]
# batch decoding
ys_mask = subsequent_mask(ys.size(-1)).unsqueeze(0) # (B,L,L)
ys_mask = subsequent_mask(ys.shape[-1]).unsqueeze(0) # (B,L,L)
xs_mask = make_xs_mask(xs).unsqueeze(1) # (B,1,T)
logp, states = self.forward_one_step(
xs, xs_mask, ys, ys_mask, cache=batch_state)

@ -115,7 +115,7 @@ class PositionalEncoding(nn.Layer, PositionalEncodingInterface):
assert offset + x.shape[
1] < self.max_len, "offset: {} + x.shape[1]: {} is larger than the max_len: {}".format(
offset, x.shape[1], self.max_len)
#TODO(Hui Zhang): using T = x.size(1), __getitem__ not support Tensor
#TODO(Hui Zhang): using T = x.shape[1], __getitem__ not support Tensor
pos_emb = self.pe[:, offset:offset + T]
x = x * self.xscale + pos_emb
return self.dropout(x), self.dropout(pos_emb)
@ -165,6 +165,6 @@ class RelPositionalEncoding(PositionalEncoding):
1] < self.max_len, "offset: {} + x.shape[1]: {} is larger than the max_len: {}".format(
offset, x.shape[1], self.max_len)
x = x * self.xscale
#TODO(Hui Zhang): using x.size(1), __getitem__ not support Tensor
#TODO(Hui Zhang): using x.shape[1], __getitem__ not support Tensor
pos_emb = self.pe[:, offset:offset + x.shape[1]]
return self.dropout(x), self.dropout(pos_emb)

@ -58,8 +58,8 @@ def pad_sequence(sequences: List[paddle.Tensor],
>>> a = paddle.ones(25, 300)
>>> b = paddle.ones(22, 300)
>>> c = paddle.ones(15, 300)
>>> pad_sequence([a, b, c]).size()
paddle.Tensor([25, 3, 300])
>>> pad_sequence([a, b, c]).shape
[25, 3, 300]
Note:
This function returns a Tensor of size ``T x B x *`` or ``B x T x *``
@ -79,7 +79,7 @@ def pad_sequence(sequences: List[paddle.Tensor],
# assuming trailing dimensions and type of all the Tensors
# in sequences are same and fetching those from sequences[0]
max_size = sequences[0].size()
max_size = sequences[0].shape
# (TODO Hui Zhang): slice not supprot `end==start`
# trailing_dims = max_size[1:]
trailing_dims = tuple(max_size[1:].numpy().tolist()) if sequences[0].ndim >= 2 else ()

Loading…
Cancel
Save