You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
197 lines
6.5 KiB
197 lines
6.5 KiB
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
# Modified from espnet(https://github.com/espnet/espnet)
|
|
from typing import Tuple
|
|
|
|
import paddle
|
|
from paddle import nn
|
|
from typeguard import check_argument_types
|
|
|
|
|
|
def pad_list(xs, pad_value):
|
|
"""Perform padding for the list of tensors.
|
|
|
|
Args:
|
|
xs (List[Tensor]): List of Tensors [(T_1, `*`), (T_2, `*`), ..., (T_B, `*`)].
|
|
pad_value (float): Value for padding.
|
|
|
|
Returns:
|
|
Tensor: Padded tensor (B, Tmax, `*`).
|
|
|
|
Examples:
|
|
>>> x = [paddle.ones([4]), paddle.ones([2]), paddle.ones([1])]
|
|
>>> x
|
|
[tensor([1., 1., 1., 1.]), tensor([1., 1.]), tensor([1.])]
|
|
>>> pad_list(x, 0)
|
|
tensor([[1., 1., 1., 1.],
|
|
[1., 1., 0., 0.],
|
|
[1., 0., 0., 0.]])
|
|
"""
|
|
n_batch = len(xs)
|
|
max_len = max(x.shape[0] for x in xs)
|
|
pad = paddle.full([n_batch, max_len, *xs[0].shape[1:]], pad_value)
|
|
|
|
for i in range(n_batch):
|
|
pad[i, :xs[i].shape[0]] = xs[i]
|
|
|
|
return pad
|
|
|
|
|
|
def make_pad_mask(lengths, length_dim=-1):
|
|
"""Make mask tensor containing indices of padded part.
|
|
|
|
Args:
|
|
lengths (Tensor(int64)): Batch of lengths (B,).
|
|
|
|
Returns:
|
|
Tensor(bool): Mask tensor containing indices of padded part bool.
|
|
|
|
Examples:
|
|
With only lengths.
|
|
|
|
>>> lengths = [5, 3, 2]
|
|
>>> make_non_pad_mask(lengths)
|
|
masks = [[0, 0, 0, 0 ,0],
|
|
[0, 0, 0, 1, 1],
|
|
[0, 0, 1, 1, 1]]
|
|
"""
|
|
if length_dim == 0:
|
|
raise ValueError("length_dim cannot be 0: {}".format(length_dim))
|
|
|
|
bs = paddle.shape(lengths)[0]
|
|
maxlen = lengths.max()
|
|
seq_range = paddle.arange(0, maxlen, dtype=paddle.int64)
|
|
seq_range_expand = seq_range.unsqueeze(0).expand([bs, maxlen])
|
|
seq_length_expand = lengths.unsqueeze(-1)
|
|
mask = seq_range_expand >= seq_length_expand
|
|
|
|
return mask
|
|
|
|
|
|
def make_non_pad_mask(lengths, length_dim=-1):
|
|
"""Make mask tensor containing indices of non-padded part.
|
|
|
|
Args:
|
|
lengths (Tensor(int64) or List): Batch of lengths (B,).
|
|
xs (Tensor, optional): The reference tensor.
|
|
If set, masks will be the same shape as this tensor.
|
|
length_dim (int, optional): Dimension indicator of the above tensor.
|
|
See the example.
|
|
|
|
Returns:
|
|
Tensor(bool): mask tensor containing indices of padded part bool.
|
|
|
|
Examples:
|
|
With only lengths.
|
|
|
|
>>> lengths = [5, 3, 2]
|
|
>>> make_non_pad_mask(lengths)
|
|
masks = [[1, 1, 1, 1 ,1],
|
|
[1, 1, 1, 0, 0],
|
|
[1, 1, 0, 0, 0]]
|
|
"""
|
|
return paddle.logical_not(make_pad_mask(lengths, length_dim))
|
|
|
|
|
|
def initialize(model: nn.Layer, init: str):
|
|
"""Initialize weights of a neural network module.
|
|
|
|
Parameters are initialized using the given method or distribution.
|
|
|
|
Custom initialization routines can be implemented into submodules
|
|
|
|
Args:
|
|
model (nn.Layer): Target.
|
|
init (str): Method of initialization.
|
|
"""
|
|
assert check_argument_types()
|
|
|
|
if init == "xavier_uniform":
|
|
nn.initializer.set_global_initializer(nn.initializer.XavierUniform(),
|
|
nn.initializer.Constant())
|
|
elif init == "xavier_normal":
|
|
nn.initializer.set_global_initializer(nn.initializer.XavierNormal(),
|
|
nn.initializer.Constant())
|
|
elif init == "kaiming_uniform":
|
|
nn.initializer.set_global_initializer(nn.initializer.KaimingUniform(),
|
|
nn.initializer.Constant())
|
|
elif init == "kaiming_normal":
|
|
nn.initializer.set_global_initializer(nn.initializer.KaimingNormal(),
|
|
nn.initializer.Constant())
|
|
else:
|
|
raise ValueError("Unknown initialization: " + init)
|
|
|
|
|
|
# for VITS
|
|
def get_random_segments(
|
|
x: paddle.paddle,
|
|
x_lengths: paddle.Tensor,
|
|
segment_size: int, ) -> Tuple[paddle.Tensor, paddle.Tensor]:
|
|
"""Get random segments.
|
|
Args:
|
|
x (Tensor): Input tensor (B, C, T).
|
|
x_lengths (Tensor): Length tensor (B,).
|
|
segment_size (int): Segment size.
|
|
Returns:
|
|
Tensor: Segmented tensor (B, C, segment_size).
|
|
Tensor: Start index tensor (B,).
|
|
"""
|
|
b, c, t = paddle.shape(x)
|
|
max_start_idx = x_lengths - segment_size
|
|
start_idxs = paddle.cast(paddle.rand([b]) * max_start_idx, 'int64')
|
|
segments = get_segments(x, start_idxs, segment_size)
|
|
|
|
return segments, start_idxs
|
|
|
|
|
|
def get_segments(
|
|
x: paddle.Tensor,
|
|
start_idxs: paddle.Tensor,
|
|
segment_size: int, ) -> paddle.Tensor:
|
|
"""Get segments.
|
|
Args:
|
|
x (Tensor): Input tensor (B, C, T).
|
|
start_idxs (Tensor): Start index tensor (B,).
|
|
segment_size (int): Segment size.
|
|
Returns:
|
|
Tensor: Segmented tensor (B, C, segment_size).
|
|
"""
|
|
b, c, t = paddle.shape(x)
|
|
segments = paddle.zeros([b, c, segment_size], dtype=x.dtype)
|
|
for i, start_idx in enumerate(start_idxs):
|
|
segments[i] = x[i, :, start_idx:start_idx + segment_size]
|
|
return segments
|
|
|
|
|
|
# see https://github.com/PaddlePaddle/X2Paddle/blob/develop/docs/pytorch_project_convertor/API_docs/ops/torch.gather.md
|
|
def paddle_gather(x, dim, index):
|
|
index_shape = index.shape
|
|
index_flatten = index.flatten()
|
|
if dim < 0:
|
|
dim = len(x.shape) + dim
|
|
nd_index = []
|
|
for k in range(len(x.shape)):
|
|
if k == dim:
|
|
nd_index.append(index_flatten)
|
|
else:
|
|
reshape_shape = [1] * len(x.shape)
|
|
reshape_shape[k] = x.shape[k]
|
|
x_arange = paddle.arange(x.shape[k], dtype=index.dtype)
|
|
x_arange = x_arange.reshape(reshape_shape)
|
|
dim_index = paddle.expand(x_arange, index_shape).flatten()
|
|
nd_index.append(dim_index)
|
|
ind2 = paddle.transpose(paddle.stack(nd_index), [1, 0]).astype("int64")
|
|
paddle_out = paddle.gather_nd(x, ind2).reshape(index_shape)
|
|
return paddle_out
|