fix elementwise_floordiv's fill_constant

3 years ago · 0b3ca1fc08
parent 0a2e367ff4
commit 0b3ca1fc08
4 changed files with 6 additions and 9 deletions
--- a/paddlespeech/t2s/modules/conformer/encoder_layer.py
+++ b/paddlespeech/t2s/modules/conformer/encoder_layer.py
@ -113,7 +113,6 @@ class EncoderLayer(nn.Layer):
            x, pos_emb = x_input[0], x_input[1]
        else:
            x, pos_emb = x_input, None
-
        skip_layer = False
        # with stochastic depth, residual connection `x + f(x)` becomes
        # `x <- x + 1 / (1 - p) * f(x)` at training time.
@ -121,14 +120,12 @@ class EncoderLayer(nn.Layer):
        if self.training and self.stochastic_depth_rate > 0:
            skip_layer = paddle.rand(1).item() < self.stochastic_depth_rate
            stoch_layer_coeff = 1.0 / (1 - self.stochastic_depth_rate)
-
        if skip_layer:
            if cache is not None:
                x = paddle.concat([cache, x], axis=1)
            if pos_emb is not None:
                return (x, pos_emb), mask
            return x, mask
-
        # whether to use macaron style
        if self.feed_forward_macaron is not None:
            residual = x
@ -138,7 +135,6 @@ class EncoderLayer(nn.Layer):
                self.feed_forward_macaron(x))
            if not self.normalize_before:
                x = self.norm_ff_macaron(x)
-
        # multi-headed self-attention module
        residual = x
        if self.normalize_before:
--- a/paddlespeech/t2s/modules/transformer/attention.py
+++ b/paddlespeech/t2s/modules/transformer/attention.py
@ -192,7 +192,8 @@ class RelPositionMultiHeadedAttention(MultiHeadedAttention):
        x_padded = paddle.concat([zero_pad, x], axis=-1)
        x_padded = x_padded.reshape([b, h, t2 + 1, t1])
        # only keep the positions from 0 to time2
-        x = x_padded[:, :, 1:].reshape([b, h, t1, t2])[:, :, :, :t2 // 2 + 1]
+        new_t = paddle.cast(paddle.floor(t2 / 2) + 1, dtype='int32')
+        x = x_padded[:, :, 1:].reshape([b, h, t1, t2])[:, :, :, :new_t]

        if self.zero_triu:
            ones = paddle.ones((t1, t2))
@ -221,7 +222,6 @@ class RelPositionMultiHeadedAttention(MultiHeadedAttention):
        q, k, v = self.forward_qkv(query, key, value)
        # (batch, time1, head, d_k)
        q = q.transpose([0, 2, 1, 3])
-
        n_batch_pos = paddle.shape(pos_emb)[0]
        p = self.linear_pos(pos_emb).reshape(
            [n_batch_pos, -1, self.h, self.d_k])
--- a/paddlespeech/t2s/modules/transformer/embedding.py
+++ b/paddlespeech/t2s/modules/transformer/embedding.py
@ -198,7 +198,8 @@ class RelPositionalEncoding(nn.Layer):
        x = x * self.xscale
        T = paddle.shape(x)[1]
        pe_size = paddle.shape(self.pe)
-        pos_emb = self.pe[:, pe_size[1] // 2 - T + 1:pe_size[1] // 2 + T, ]
+        tmp = paddle.cast(paddle.floor(pe_size[1] / 2), dtype='int32')
+        pos_emb = self.pe[:, tmp - T + 1:tmp + T, ]
        return self.dropout(x), self.dropout(pos_emb)


--- a/paddlespeech/t2s/modules/transformer/multi_layer_conv.py
+++ b/paddlespeech/t2s/modules/transformer/multi_layer_conv.py
@ -69,8 +69,8 @@ class MultiLayeredConv1d(nn.Layer):
            Tensor: Batch of output tensors (B, T, in_chans).
        """
        x = self.relu(self.w_1(x.transpose([0, 2, 1]))).transpose([0, 2, 1])
-        return self.w_2(self.dropout(x).transpose([0, 2, 1])).transpose(
-            [0, 2, 1])
+        out = self.w_2(self.dropout(x).transpose([0, 2, 1])).transpose([0, 2, 1])
+        return out


 class Conv1dLinear(nn.Layer):