|
|
@ -89,7 +89,7 @@ class PositionalEncoding(nn.Layer, PositionalEncodingInterface):
|
|
|
|
self.max_len = max_len
|
|
|
|
self.max_len = max_len
|
|
|
|
self.xscale = paddle.to_tensor(math.sqrt(self.d_model))
|
|
|
|
self.xscale = paddle.to_tensor(math.sqrt(self.d_model))
|
|
|
|
self.dropout = nn.Dropout(p=dropout_rate)
|
|
|
|
self.dropout = nn.Dropout(p=dropout_rate)
|
|
|
|
self.pe = paddle.zeros([self.max_len, self.d_model]) #[T,D]
|
|
|
|
self.pe = paddle.zeros([1, self.max_len, self.d_model]) #[B=1,T,D]
|
|
|
|
|
|
|
|
|
|
|
|
position = paddle.arange(
|
|
|
|
position = paddle.arange(
|
|
|
|
0, self.max_len, dtype=paddle.float32).unsqueeze(1) #[T, 1]
|
|
|
|
0, self.max_len, dtype=paddle.float32).unsqueeze(1) #[T, 1]
|
|
|
@ -97,9 +97,8 @@ class PositionalEncoding(nn.Layer, PositionalEncodingInterface):
|
|
|
|
paddle.arange(0, self.d_model, 2, dtype=paddle.float32) *
|
|
|
|
paddle.arange(0, self.d_model, 2, dtype=paddle.float32) *
|
|
|
|
-(math.log(10000.0) / self.d_model))
|
|
|
|
-(math.log(10000.0) / self.d_model))
|
|
|
|
|
|
|
|
|
|
|
|
self.pe[:, 0::2] = paddle.sin(position * div_term)
|
|
|
|
self.pe[:, :, 0::2] = paddle.sin(position * div_term)
|
|
|
|
self.pe[:, 1::2] = paddle.cos(position * div_term)
|
|
|
|
self.pe[:, :, 1::2] = paddle.cos(position * div_term)
|
|
|
|
self.pe = self.pe.unsqueeze(0) #[1, T, D]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def forward(self, x: paddle.Tensor,
|
|
|
|
def forward(self, x: paddle.Tensor,
|
|
|
|
offset: int=0) -> Tuple[paddle.Tensor, paddle.Tensor]:
|
|
|
|
offset: int=0) -> Tuple[paddle.Tensor, paddle.Tensor]:
|
|
|
|