From 62bac0a1d1b38fd62fbef3ba311d462f44dfaea3 Mon Sep 17 00:00:00 2001 From: liyulingyue <852433440@qq.com> Date: Tue, 3 Dec 2024 20:29:04 +0800 Subject: [PATCH] fix aishell3-vc0 --- paddlespeech/t2s/modules/tacotron2/attentions.py | 4 +++- paddlespeech/t2s/modules/tacotron2/encoder.py | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/paddlespeech/t2s/modules/tacotron2/attentions.py b/paddlespeech/t2s/modules/tacotron2/attentions.py index 5d1a24845..1676fc330 100644 --- a/paddlespeech/t2s/modules/tacotron2/attentions.py +++ b/paddlespeech/t2s/modules/tacotron2/attentions.py @@ -171,7 +171,9 @@ class AttLoc(nn.Layer): if paddle.sum(att_prev) == 0: # if no bias, 0 0-pad goes 0 att_prev = 1.0 - make_pad_mask(enc_hs_len) - att_prev = att_prev / enc_hs_len.unsqueeze(-1) + unsqueeze_enc_hs_len = enc_hs_len.unsqueeze(-1).astype( + att_prev.dtype) + att_prev = att_prev / unsqueeze_enc_hs_len # att_prev: (utt, frame) -> (utt, 1, 1, frame) # -> (utt, att_conv_chans, 1, frame) diff --git a/paddlespeech/t2s/modules/tacotron2/encoder.py b/paddlespeech/t2s/modules/tacotron2/encoder.py index 224c82400..7683def83 100644 --- a/paddlespeech/t2s/modules/tacotron2/encoder.py +++ b/paddlespeech/t2s/modules/tacotron2/encoder.py @@ -162,6 +162,8 @@ class Encoder(nn.Layer): return xs.transpose([0, 2, 1]) if not isinstance(ilens, paddle.Tensor): ilens = paddle.to_tensor(ilens) + if ilens.ndim == 0: + ilens = ilens.unsqueeze(0) xs = xs.transpose([0, 2, 1]) # for dygraph to static graph # self.blstm.flatten_parameters()