diff --git a/paddlespeech/t2s/modules/diffnet.py b/paddlespeech/t2s/modules/diffnet.py index 2f433ad68..98be9a9a3 100644 --- a/paddlespeech/t2s/modules/diffnet.py +++ b/paddlespeech/t2s/modules/diffnet.py @@ -121,6 +121,7 @@ class SinusoidalPosEmb(nn.Layer): def forward(self, x: paddle.Tensor): x = paddle.cast(x, 'float32') + x = x.unsqueeze(0) half_dim = self.dim // 2 emb = math.log(10000) / (half_dim - 1) emb = paddle.exp(paddle.arange(half_dim) * -emb) diff --git a/paddlespeech/t2s/modules/nets_utils.py b/paddlespeech/t2s/modules/nets_utils.py index 57c46e3a8..d34c579b3 100644 --- a/paddlespeech/t2s/modules/nets_utils.py +++ b/paddlespeech/t2s/modules/nets_utils.py @@ -180,7 +180,7 @@ def make_pad_mask(lengths, xs=None, length_dim=-1): """ if length_dim == 0: raise ValueError("length_dim cannot be 0: {}".format(length_dim)) - + lengths = lengths.unsqueeze(0) bs = paddle.shape(lengths) if xs is None: maxlen = paddle.cast(lengths.max(), dtype=bs.dtype)