fix fastspeech2 to static

pull/948/head
TianYuan 3 years ago
parent 6dbcd7720d
commit f652ba3a34

@ -341,6 +341,7 @@ class FastSpeech2(nn.Layer):
Tensor Tensor
speech_lengths, modified if reduction_factor > 1 speech_lengths, modified if reduction_factor > 1
""" """
# input of embedding must be int64 # input of embedding must be int64
xs = paddle.cast(text, 'int64') xs = paddle.cast(text, 'int64')
ilens = paddle.cast(text_lengths, 'int64') ilens = paddle.cast(text_lengths, 'int64')
@ -387,8 +388,8 @@ class FastSpeech2(nn.Layer):
spk_id=None, spk_id=None,
tone_id=None) -> Sequence[paddle.Tensor]: tone_id=None) -> Sequence[paddle.Tensor]:
# forward encoder # forward encoder
bs = xs.shape[0]
x_masks = self._source_mask(ilens) x_masks = self._source_mask(ilens)
# (B, Tmax, adim) # (B, Tmax, adim)
hs, _ = self.encoder(xs, x_masks) hs, _ = self.encoder(xs, x_masks)
@ -405,7 +406,6 @@ class FastSpeech2(nn.Layer):
if tone_id is not None: if tone_id is not None:
tone_embs = self.tone_embedding_table(tone_id) tone_embs = self.tone_embedding_table(tone_id)
hs = self._integrate_with_tone_embed(hs, tone_embs) hs = self._integrate_with_tone_embed(hs, tone_embs)
# forward duration predictor and variance predictors # forward duration predictor and variance predictors
d_masks = make_pad_mask(ilens) d_masks = make_pad_mask(ilens)
@ -452,9 +452,10 @@ class FastSpeech2(nn.Layer):
else: else:
h_masks = None h_masks = None
# (B, Lmax, adim) # (B, Lmax, adim)
zs, _ = self.decoder(hs, h_masks) zs, _ = self.decoder(hs, h_masks)
# (B, Lmax, odim) # (B, Lmax, odim)
before_outs = self.feat_out(zs).reshape((zs.shape[0], -1, self.odim)) before_outs = self.feat_out(zs).reshape((bs, -1, self.odim))
# postnet -> (B, Lmax//r * r, odim) # postnet -> (B, Lmax//r * r, odim)
if self.postnet is None: if self.postnet is None:
@ -462,7 +463,6 @@ class FastSpeech2(nn.Layer):
else: else:
after_outs = before_outs + self.postnet( after_outs = before_outs + self.postnet(
before_outs.transpose((0, 2, 1))).transpose((0, 2, 1)) before_outs.transpose((0, 2, 1))).transpose((0, 2, 1))
return before_outs, after_outs, d_outs, p_outs, e_outs return before_outs, after_outs, d_outs, p_outs, e_outs
def inference( def inference(
@ -517,8 +517,8 @@ class FastSpeech2(nn.Layer):
d = paddle.cast(durations, 'int64') d = paddle.cast(durations, 'int64')
p, e = pitch, energy p, e = pitch, energy
# setup batch axis # setup batch axis
ilens = paddle.to_tensor( ilens = paddle.shape(x)[0]
[x.shape[0]], dtype=paddle.int64, place=x.place)
xs, ys = x.unsqueeze(0), None xs, ys = x.unsqueeze(0), None
if y is not None: if y is not None:

@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""Length regulator related modules.""" """Length regulator related modules."""
import numpy as np
import paddle import paddle
from paddle import nn from paddle import nn
@ -50,10 +49,10 @@ class LengthRegulator(nn.Layer):
durations: (B, T) durations: (B, T)
""" """
batch_size, t_enc = durations.shape batch_size, t_enc = durations.shape
durations = durations.numpy() # durations = durations.numpy()
slens = np.sum(durations, -1) slens = paddle.sum(durations, -1)
t_dec = np.max(slens) t_dec = paddle.max(slens)
M = np.zeros([batch_size, t_dec, t_enc]) M = paddle.zeros([batch_size, t_dec, t_enc])
for i in range(batch_size): for i in range(batch_size):
k = 0 k = 0
for j in range(t_enc): for j in range(t_enc):
@ -82,6 +81,7 @@ class LengthRegulator(nn.Layer):
Tensor Tensor
replicated input tensor based on durations (B, T*, D). replicated input tensor based on durations (B, T*, D).
""" """
if alpha != 1.0: if alpha != 1.0:
assert alpha > 0 assert alpha > 0
ds = paddle.round(ds.cast(dtype=paddle.float32) * alpha) ds = paddle.round(ds.cast(dtype=paddle.float32) * alpha)

@ -37,7 +37,7 @@ class MultiHeadedAttention(nn.Layer):
def __init__(self, n_head, n_feat, dropout_rate): def __init__(self, n_head, n_feat, dropout_rate):
"""Construct an MultiHeadedAttention object.""" """Construct an MultiHeadedAttention object."""
super(MultiHeadedAttention, self).__init__() super(MultiHeadedAttention, self).__init__()
assert n_feat % n_head == 0 # assert n_feat % n_head == 0
# We assume d_v always equals d_k # We assume d_v always equals d_k
self.d_k = n_feat // n_head self.d_k = n_feat // n_head
self.h = n_head self.h = n_head
@ -106,13 +106,9 @@ class MultiHeadedAttention(nn.Layer):
n_batch = value.shape[0] n_batch = value.shape[0]
softmax = paddle.nn.Softmax(axis=-1) softmax = paddle.nn.Softmax(axis=-1)
if mask is not None: if mask is not None:
mask = mask.unsqueeze(1) mask = mask.unsqueeze(1)
mask = paddle.logical_not(mask) mask = paddle.logical_not(mask)
min_value = float( min_value = float(numpy.finfo("float32").min)
numpy.finfo(
paddle.to_tensor(0, dtype=scores.dtype).numpy().dtype).min)
scores = masked_fill(scores, mask, min_value) scores = masked_fill(scores, mask, min_value)
# (batch, head, time1, time2) # (batch, head, time1, time2)
self.attn = softmax(scores) self.attn = softmax(scores)

@ -46,13 +46,14 @@ class PositionalEncoding(nn.Layer):
def extend_pe(self, x): def extend_pe(self, x):
"""Reset the positional encodings.""" """Reset the positional encodings."""
pe = paddle.zeros([x.shape[1], self.d_model]) pe = paddle.zeros([paddle.shape(x)[1], self.d_model])
if self.reverse: if self.reverse:
position = paddle.arange( position = paddle.arange(
x.shape[1] - 1, -1, -1.0, dtype=paddle.float32).unsqueeze(1) paddle.shape(x)[1] - 1, -1, -1.0,
dtype=paddle.float32).unsqueeze(1)
else: else:
position = paddle.arange( position = paddle.arange(
0, x.shape[1], dtype=paddle.float32).unsqueeze(1) 0, paddle.shape(x)[1], dtype=paddle.float32).unsqueeze(1)
div_term = paddle.exp( div_term = paddle.exp(
paddle.arange(0, self.d_model, 2, dtype=paddle.float32) * paddle.arange(0, self.d_model, 2, dtype=paddle.float32) *
-(math.log(10000.0) / self.d_model)) -(math.log(10000.0) / self.d_model))
@ -75,7 +76,8 @@ class PositionalEncoding(nn.Layer):
Encoded tensor (batch, time, `*`). Encoded tensor (batch, time, `*`).
""" """
self.extend_pe(x) self.extend_pe(x)
x = x * self.xscale + self.pe[:, :x.shape[1]]
x = x * self.xscale + self.pe[:, :paddle.shape(x)[1]]
return self.dropout(x) return self.dropout(x)
@ -101,7 +103,7 @@ class ScaledPositionalEncoding(PositionalEncoding):
x = paddle.ones([1], dtype="float32") x = paddle.ones([1], dtype="float32")
self.alpha = paddle.create_parameter( self.alpha = paddle.create_parameter(
shape=x.shape, shape=x.shape,
dtype=str(x.numpy().dtype), dtype="float32",
default_initializer=paddle.nn.initializer.Assign(x)) default_initializer=paddle.nn.initializer.Assign(x))
def reset_parameters(self): def reset_parameters(self):
@ -115,12 +117,11 @@ class ScaledPositionalEncoding(PositionalEncoding):
---------- ----------
x : paddle.Tensor x : paddle.Tensor
Input tensor (batch, time, `*`). Input tensor (batch, time, `*`).
Returns Returns
---------- ----------
paddle.Tensor paddle.Tensor
Encoded tensor (batch, time, `*`). Encoded tensor (batch, time, `*`).
""" """
self.extend_pe(x) self.extend_pe(x)
x = x + self.alpha * self.pe[:, :x.shape[1]] x = x + self.alpha * self.pe[:, :paddle.shape(x)[1]]
return self.dropout(x) return self.dropout(x)

@ -185,6 +185,7 @@ class Encoder(nn.Layer):
paddle.Tensor paddle.Tensor
Mask tensor (#batch, time). Mask tensor (#batch, time).
""" """
xs = self.embed(xs) xs = self.embed(xs)
xs, masks = self.encoders(xs, masks) xs, masks = self.encoders(xs, masks)
if self.normalize_before: if self.normalize_before:

@ -87,7 +87,7 @@ class EncoderLayer(nn.Layer):
if cache is None: if cache is None:
x_q = x x_q = x
else: else:
assert cache.shape == (x.shape[0], x.shape[1] - 1, self.size) # assert cache.shape == (x.shape[0], x.shape[1] - 1, self.size)
x_q = x[:, -1:, :] x_q = x[:, -1:, :]
residual = residual[:, -1:, :] residual = residual[:, -1:, :]
mask = None if mask is None else mask[:, -1:, :] mask = None if mask is None else mask[:, -1:, :]

@ -44,6 +44,7 @@ class LayerNorm(paddle.nn.LayerNorm):
paddle.Tensor paddle.Tensor
Normalized tensor. Normalized tensor.
""" """
if self.dim == -1: if self.dim == -1:
return super(LayerNorm, self).forward(x) return super(LayerNorm, self).forward(x)
else: else:
@ -54,9 +55,10 @@ class LayerNorm(paddle.nn.LayerNorm):
orig_perm = list(range(len_dim)) orig_perm = list(range(len_dim))
new_perm = orig_perm[:] new_perm = orig_perm[:]
new_perm[self.dim], new_perm[len_dim - temp = new_perm[self.dim]
1] = new_perm[len_dim - new_perm[self.dim] = new_perm[len_dim - 1]
1], new_perm[self.dim] new_perm[len_dim - 1] = temp
# new_perm[self.dim], new_perm[len_dim -1] = new_perm[len_dim -1], new_perm[self.dim]
return paddle.transpose( return paddle.transpose(
super(LayerNorm, self).forward(paddle.transpose(x, new_perm)), super(LayerNorm, self).forward(paddle.transpose(x, new_perm)),

@ -25,12 +25,22 @@ def is_broadcastable(shp1, shp2):
return True return True
def broadcast_shape(shp1, shp2):
result = []
for a, b in zip(shp1[::-1], shp2[::-1]):
result.append(max(a, b))
return result[::-1]
def masked_fill(xs: paddle.Tensor, def masked_fill(xs: paddle.Tensor,
mask: paddle.Tensor, mask: paddle.Tensor,
value: Union[float, int]): value: Union[float, int]):
assert is_broadcastable(xs.shape, mask.shape) is True # assert is_broadcastable(xs.shape, mask.shape) is True
bshape = paddle.broadcast_shape(xs.shape, mask.shape) # bshape = paddle.broadcast_shape(xs.shape, mask.shape)
bshape = broadcast_shape(xs.shape, mask.shape)
mask.stop_gradient = True
mask = mask.broadcast_to(bshape) mask = mask.broadcast_to(bshape)
trues = paddle.ones_like(xs) * value trues = paddle.ones_like(xs) * value
mask = mask.cast(dtype=paddle.bool) mask = mask.cast(dtype=paddle.bool)
xs = paddle.where(mask, trues, xs) xs = paddle.where(mask, trues, xs)

@ -56,7 +56,7 @@ def make_pad_mask(lengths, length_dim=-1):
Parameters Parameters
---------- ----------
lengths : LongTensor or List lengths : LongTensor
Batch of lengths (B,). Batch of lengths (B,).
Returns Returns
@ -77,17 +77,11 @@ def make_pad_mask(lengths, length_dim=-1):
if length_dim == 0: if length_dim == 0:
raise ValueError("length_dim cannot be 0: {}".format(length_dim)) raise ValueError("length_dim cannot be 0: {}".format(length_dim))
if not isinstance(lengths, list): bs = paddle.shape(lengths)[0]
lengths = lengths.tolist() maxlen = lengths.max()
bs = int(len(lengths))
maxlen = int(max(lengths))
seq_range = paddle.arange(0, maxlen, dtype=paddle.int64) seq_range = paddle.arange(0, maxlen, dtype=paddle.int64)
seq_range_expand = seq_range.unsqueeze(0).expand([bs, maxlen]) seq_range_expand = seq_range.unsqueeze(0).expand([bs, maxlen])
seq_length_expand = lengths.unsqueeze(-1)
seq_length_expand = paddle.to_tensor(
lengths, dtype=seq_range_expand.dtype).unsqueeze(-1)
mask = seq_range_expand >= seq_length_expand mask = seq_range_expand >= seq_length_expand
return mask return mask

Loading…
Cancel
Save