diff --git a/deepspeech/modules/activation.py b/deepspeech/modules/activation.py index 72ccb5346..ecaca5bca 100644 --- a/deepspeech/modules/activation.py +++ b/deepspeech/modules/activation.py @@ -25,7 +25,9 @@ from paddle.nn import initializer as I logger = logging.getLogger(__name__) -__all__ = ['brelu', "glu"] +__all__ = [ + "brelu", "glu", "GLU", "LinearGLUBlock", "ConstantPad2d", "ConvGLUBlock" +] def brelu(x, t_min=0.0, t_max=24.0, name=None): @@ -73,6 +75,8 @@ def glu(x, dim=-1): # TODO(Hui Zhang): remove this activation if not hasattr(nn.functional, 'glu'): + logger.warn( + "register user glu to paddle.nn.functional, remove this when fixed!") setattr(nn.functional, 'glu', glu) diff --git a/deepspeech/modules/conformer_convolution.py b/deepspeech/modules/conformer_convolution.py index 5416bd898..4c3eb9f4f 100644 --- a/deepspeech/modules/conformer_convolution.py +++ b/deepspeech/modules/conformer_convolution.py @@ -58,7 +58,8 @@ class ConvolutionModule(nn.Layer): kernel_size=1, stride=1, padding=0, - bias=None if bias else False, # None for True as default + bias_attr=None + if bias else False, # None for True, using bias as default config ) # self.lorder is used to distinguish if it's a causal convolution, @@ -82,7 +83,8 @@ class ConvolutionModule(nn.Layer): stride=1, padding=padding, groups=channels, - bias=None if bias else False, # None for True as default + bias_attr=None + if bias else False, # None for True, using bias as default config ) assert norm in ['batch_norm', 'layer_norm'] @@ -99,7 +101,8 @@ class ConvolutionModule(nn.Layer): kernel_size=1, stride=1, padding=0, - bias=None if bias else False, # None for True as default + bias_attr=None + if bias else False, # None for True, using bias as default config ) self.activation = activation @@ -109,10 +112,10 @@ class ConvolutionModule(nn.Layer): Args: x (paddle.Tensor): Input tensor (#batch, time, channels). cache (paddle.Tensor): left context cache, it is only - used in causal convolution. (#batch, channels, time) + used in causal convolution. (#batch, channels, time') Returns: paddle.Tensor: Output tensor (#batch, time, channels). - paddle.Tensor: Output cache tensor (#batch, channels, time) + paddle.Tensor: Output cache tensor (#batch, channels, time') """ # exchange the temporal dimension and the feature dimension x = x.transpose([0, 2, 1]) # [B, C, T]