|
|
@ -105,9 +105,7 @@ class TransformerEncoderLayer(nn.Layer):
|
|
|
|
if self.normalize_before:
|
|
|
|
if self.normalize_before:
|
|
|
|
x = self.norm1(x)
|
|
|
|
x = self.norm1(x)
|
|
|
|
|
|
|
|
|
|
|
|
x_att, new_att_cache = self.self_attn(
|
|
|
|
x_att, new_att_cache = self.self_attn(x, x, x, mask, cache=att_cache)
|
|
|
|
x, x, x, mask, cache=att_cache
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if self.concat_after:
|
|
|
|
if self.concat_after:
|
|
|
|
x_concat = paddle.concat((x, x_att), axis=-1)
|
|
|
|
x_concat = paddle.concat((x, x_att), axis=-1)
|
|
|
@ -211,7 +209,8 @@ class ConformerEncoderLayer(nn.Layer):
|
|
|
|
att_cache (paddle.Tensor): Cache tensor of the KEY & VALUE
|
|
|
|
att_cache (paddle.Tensor): Cache tensor of the KEY & VALUE
|
|
|
|
(#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
|
|
|
|
(#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
|
|
|
|
cnn_cache (paddle.Tensor): Convolution cache in conformer layer
|
|
|
|
cnn_cache (paddle.Tensor): Convolution cache in conformer layer
|
|
|
|
(#batch=1, size, cache_t2)
|
|
|
|
(1, #batch=1, size, cache_t2). First dim will not be used, just
|
|
|
|
|
|
|
|
for dy2st.
|
|
|
|
Returns:
|
|
|
|
Returns:
|
|
|
|
paddle.Tensor: Output tensor (#batch, time, size).
|
|
|
|
paddle.Tensor: Output tensor (#batch, time, size).
|
|
|
|
paddle.Tensor: Mask tensor (#batch, time, time).
|
|
|
|
paddle.Tensor: Mask tensor (#batch, time, time).
|
|
|
@ -219,6 +218,8 @@ class ConformerEncoderLayer(nn.Layer):
|
|
|
|
(#batch=1, head, cache_t1 + time, d_k * 2).
|
|
|
|
(#batch=1, head, cache_t1 + time, d_k * 2).
|
|
|
|
paddle.Tensor: cnn_cahce tensor (#batch, size, cache_t2).
|
|
|
|
paddle.Tensor: cnn_cahce tensor (#batch, size, cache_t2).
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
|
|
|
|
# (1, #batch=1, size, cache_t2) -> (#batch=1, size, cache_t2)
|
|
|
|
|
|
|
|
cnn_cache = paddle.squeeze(cnn_cache, axis=0)
|
|
|
|
|
|
|
|
|
|
|
|
# whether to use macaron style FFN
|
|
|
|
# whether to use macaron style FFN
|
|
|
|
if self.feed_forward_macaron is not None:
|
|
|
|
if self.feed_forward_macaron is not None:
|
|
|
@ -250,7 +251,6 @@ class ConformerEncoderLayer(nn.Layer):
|
|
|
|
# convolution module
|
|
|
|
# convolution module
|
|
|
|
# Fake new cnn cache here, and then change it in conv_module
|
|
|
|
# Fake new cnn cache here, and then change it in conv_module
|
|
|
|
new_cnn_cache = paddle.zeros([0, 0, 0], dtype=x.dtype)
|
|
|
|
new_cnn_cache = paddle.zeros([0, 0, 0], dtype=x.dtype)
|
|
|
|
cnn_cache = paddle.squeeze(cnn_cache, axis=0)
|
|
|
|
|
|
|
|
if self.conv_module is not None:
|
|
|
|
if self.conv_module is not None:
|
|
|
|
residual = x
|
|
|
|
residual = x
|
|
|
|
if self.normalize_before:
|
|
|
|
if self.normalize_before:
|
|
|
|