add glu and conformer conv

5 years ago · 59ed89014b
parent b2bc6eb526
commit 59ed89014b
2 changed files with 13 additions and 6 deletions
--- a/deepspeech/modules/activation.py
+++ b/deepspeech/modules/activation.py
@ -25,7 +25,9 @@ from paddle.nn import initializer as I
 logger = logging.getLogger(__name__)
-__all__ = ['brelu', "glu"]
+__all__ = [
    "brelu", "glu", "GLU", "LinearGLUBlock", "ConstantPad2d", "ConvGLUBlock"
 ]
 def brelu(x, t_min=0.0, t_max=24.0, name=None):
@ -73,6 +75,8 @@ def glu(x, dim=-1):
 # TODO(Hui Zhang): remove this activation
 if not hasattr(nn.functional, 'glu'):
    logger.warn(
        "register user glu to paddle.nn.functional, remove this when fixed!")
    setattr(nn.functional, 'glu', glu)
--- a/deepspeech/modules/conformer_convolution.py
+++ b/deepspeech/modules/conformer_convolution.py
@ -58,7 +58,8 @@ class ConvolutionModule(nn.Layer):
            kernel_size=1,
            stride=1,
            padding=0,
-            bias=None if bias else False,  # None for True as default
+            bias_attr=None
            if bias else False,  # None for True, using bias as default config
        )
        # self.lorder is used to distinguish if it's a causal convolution,
@ -82,7 +83,8 @@ class ConvolutionModule(nn.Layer):
            stride=1,
            padding=padding,
            groups=channels,
-            bias=None if bias else False,  # None for True as default
+            bias_attr=None
            if bias else False,  # None for True, using bias as default config
        )
        assert norm in ['batch_norm', 'layer_norm']
@ -99,7 +101,8 @@ class ConvolutionModule(nn.Layer):
            kernel_size=1,
            stride=1,
            padding=0,
-            bias=None if bias else False,  # None for True as default
+            bias_attr=None
            if bias else False,  # None for True, using bias as default config
        )
        self.activation = activation
@ -109,10 +112,10 @@ class ConvolutionModule(nn.Layer):
        Args:
            x (paddle.Tensor): Input tensor (#batch, time, channels).
            cache (paddle.Tensor): left context cache, it is only
-                used in causal convolution. (#batch, channels, time)
+                used in causal convolution. (#batch, channels, time')
        Returns:
            paddle.Tensor: Output tensor (#batch, time, channels).
-            paddle.Tensor: Output cache tensor (#batch, channels, time)
+            paddle.Tensor: Output cache tensor (#batch, channels, time')
        """
        # exchange the temporal dimension and the feature dimension
        x = x.transpose([0, 2, 1])  # [B, C, T]