diff --git a/paddlespeech/s2t/modules/positionwise_feed_forward.py b/paddlespeech/s2t/modules/positionwise_feed_forward.py index b5395f049..9ebd5d638 100644 --- a/paddlespeech/s2t/modules/positionwise_feed_forward.py +++ b/paddlespeech/s2t/modules/positionwise_feed_forward.py @@ -55,12 +55,13 @@ class PositionwiseFeedForward(nn.Layer): self.dropout = nn.Dropout(dropout_rate) self.w_2 = Linear(hidden_units, idim) self.adaptive_scale = adaptive_scale - ada_scale = self.create_parameter( - [1, 1, idim], default_initializer=I.XavierUniform()) - self.add_parameter('ada_scale', ada_scale) - ada_bias = self.create_parameter( - [1, 1, idim], default_initializer=I.XavierUniform()) - self.add_parameter('ada_bias', ada_bias) + if self.adaptive_scale: + ada_scale = self.create_parameter( + [1, 1, idim], default_initializer=I.XavierUniform()) + self.add_parameter('ada_scale', ada_scale) + ada_bias = self.create_parameter( + [1, 1, idim], default_initializer=I.XavierUniform()) + self.add_parameter('ada_bias', ada_bias) if init_weights: self.init_weights() @@ -84,4 +85,6 @@ class PositionwiseFeedForward(nn.Layer): Returns: output tensor, (B, Lmax, D) """ + if self.adaptive_scale: + xs = self.ada_scale * xs + self.ada_bias return self.w_2(self.dropout(self.activation(self.w_1(xs))))