diff --git a/paddlespeech/t2s/models/vits/text_encoder.py b/paddlespeech/t2s/models/vits/text_encoder.py index 033472eb0..aa26c3c47 100644 --- a/paddlespeech/t2s/models/vits/text_encoder.py +++ b/paddlespeech/t2s/models/vits/text_encoder.py @@ -106,8 +106,8 @@ class TextEncoder(nn.Layer): # define modules self.emb = nn.Embedding(vocabs, attention_dim) - dist = paddle.distribution.Normal(loc=0.0, scale=attention_dim**-0.5) - w = dist.sample(self.emb.weight.shape) + # dist = paddle.distribution.Normal(loc=0.0, scale=attention_dim**-0.5) + # w = dist.sample(self.emb.weight.shape) self.emb.weight.set_value(w) self.encoder = Encoder( @@ -169,7 +169,7 @@ class TextEncoder(nn.Layer): return x, m, logs, x_mask def reset_parameters(self): - normal_(self.emb.weight) + normal_(self.emb.weight, mean=0.0, std=self.attention_dim**-0.5) if self.emb._padding_idx is not None: with paddle.no_grad(): self.emb.weight[self.emb._padding_idx] = 0 \ No newline at end of file