|
|
@ -195,7 +195,6 @@ class AudioEncoder(nn.Layer):
|
|
|
|
x : paddle.Tensor, shape = (batch_size, n_mels, n_ctx)
|
|
|
|
x : paddle.Tensor, shape = (batch_size, n_mels, n_ctx)
|
|
|
|
the mel spectrogram of the audio
|
|
|
|
the mel spectrogram of the audio
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
print (x.shape)
|
|
|
|
|
|
|
|
x = F.gelu(self.conv1(x))
|
|
|
|
x = F.gelu(self.conv1(x))
|
|
|
|
x = F.gelu(self.conv2(x))
|
|
|
|
x = F.gelu(self.conv2(x))
|
|
|
|
x = paddle.transpose(x, (0, 2, 1))
|
|
|
|
x = paddle.transpose(x, (0, 2, 1))
|
|
|
|