PaddleSpeech/paddlespeech/t2s/modules/nets_utils.py

# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
from typing import Tuple

import paddle
from paddle import nn
from typeguard import check_argument_types


def pad_list(xs, pad_value):
    """Perform padding for the list of tensors.

    Args:
        xs (List[Tensor]): List of Tensors [(T_1, `*`), (T_2, `*`), ..., (T_B, `*`)].
        pad_value (float): Value for padding.

    Returns:
        Tensor: Padded tensor (B, Tmax, `*`).

    Examples:
        >>> x = [paddle.ones([4]), paddle.ones([2]), paddle.ones([1])]
        >>> x
        [tensor([1., 1., 1., 1.]), tensor([1., 1.]), tensor([1.])]
        >>> pad_list(x, 0)
        tensor([[1., 1., 1., 1.],
                [1., 1., 0., 0.],
                [1., 0., 0., 0.]])
    """
    n_batch = len(xs)
    max_len = max(x.shape[0] for x in xs)
    pad = paddle.full([n_batch, max_len, *xs[0].shape[1:]], pad_value)

    for i in range(n_batch):
        pad[i, :xs[i].shape[0]] = xs[i]

    return pad


def make_pad_mask(lengths, length_dim=-1):
    """Make mask tensor containing indices of padded part.

    Args:
        lengths (Tensor(int64)): Batch of lengths (B,).

    Returns: 
        Tensor(bool): Mask tensor containing indices of padded part bool.

    Examples:
        With only lengths.

        >>> lengths = [5, 3, 2]
        >>> make_non_pad_mask(lengths)
        masks = [[0, 0, 0, 0 ,0],
                    [0, 0, 0, 1, 1],
                    [0, 0, 1, 1, 1]]
    """
    if length_dim == 0:
        raise ValueError("length_dim cannot be 0: {}".format(length_dim))

    bs = paddle.shape(lengths)[0]
    maxlen = lengths.max()
    seq_range = paddle.arange(0, maxlen, dtype=paddle.int64)
    seq_range_expand = seq_range.unsqueeze(0).expand([bs, maxlen])
    seq_length_expand = lengths.unsqueeze(-1)
    mask = seq_range_expand >= seq_length_expand

    return mask


def make_non_pad_mask(lengths, length_dim=-1):
    """Make mask tensor containing indices of non-padded part.

    Args:
        lengths (Tensor(int64) or List): Batch of lengths (B,).
        xs (Tensor, optional): The reference tensor.
            If set, masks will be the same shape as this tensor.
        length_dim (int, optional): Dimension indicator of the above tensor.
            See the example.

    Returns:
        Tensor(bool): mask tensor containing indices of padded part bool.

    Examples: 
        With only lengths.

        >>> lengths = [5, 3, 2]
        >>> make_non_pad_mask(lengths)
        masks = [[1, 1, 1, 1 ,1],
                    [1, 1, 1, 0, 0],
                    [1, 1, 0, 0, 0]]
    """
    return paddle.logical_not(make_pad_mask(lengths, length_dim))


def initialize(model: nn.Layer, init: str):
    """Initialize weights of a neural network module.

    Parameters are initialized using the given method or distribution.

    Custom initialization routines can be implemented into submodules

    Args:
        model (nn.Layer): Target.
        init (str): Method of initialization.
    """
    assert check_argument_types()

    if init == "xavier_uniform":
        nn.initializer.set_global_initializer(nn.initializer.XavierUniform(),
                                              nn.initializer.Constant())
    elif init == "xavier_normal":
        nn.initializer.set_global_initializer(nn.initializer.XavierNormal(),
                                              nn.initializer.Constant())
    elif init == "kaiming_uniform":
        nn.initializer.set_global_initializer(nn.initializer.KaimingUniform(),
                                              nn.initializer.Constant())
    elif init == "kaiming_normal":
        nn.initializer.set_global_initializer(nn.initializer.KaimingNormal(),
                                              nn.initializer.Constant())
    else:
        raise ValueError("Unknown initialization: " + init)


# for VITS
def get_random_segments(
        x: paddle.paddle,
        x_lengths: paddle.Tensor,
        segment_size: int, ) -> Tuple[paddle.Tensor, paddle.Tensor]:
    """Get random segments.
    Args:
        x (Tensor): Input tensor (B, C, T).
        x_lengths (Tensor): Length tensor (B,).
        segment_size (int): Segment size.
    Returns:
        Tensor: Segmented tensor (B, C, segment_size).
        Tensor: Start index tensor (B,).
    """
    b, c, t = paddle.shape(x)
    max_start_idx = x_lengths - segment_size
    start_idxs = paddle.cast(paddle.rand([b]) * max_start_idx, 'int64')
    segments = get_segments(x, start_idxs, segment_size)

    return segments, start_idxs


def get_segments(
        x: paddle.Tensor,
        start_idxs: paddle.Tensor,
        segment_size: int, ) -> paddle.Tensor:
    """Get segments.
    Args:
        x (Tensor): Input tensor (B, C, T).
        start_idxs (Tensor): Start index tensor (B,).
        segment_size (int): Segment size.
    Returns:
        Tensor: Segmented tensor (B, C, segment_size).
    """
    b, c, t = paddle.shape(x)
    segments = paddle.zeros([b, c, segment_size], dtype=x.dtype)
    for i, start_idx in enumerate(start_idxs):
        segments[i] = x[i, :, start_idx:start_idx + segment_size]
    return segments


# see https://github.com/PaddlePaddle/X2Paddle/blob/develop/docs/pytorch_project_convertor/API_docs/ops/torch.gather.md
def paddle_gather(x, dim, index):
    index_shape = index.shape
    index_flatten = index.flatten()
    if dim < 0:
        dim = len(x.shape) + dim
    nd_index = []
    for k in range(len(x.shape)):
        if k == dim:
            nd_index.append(index_flatten)
        else:
            reshape_shape = [1] * len(x.shape)
            reshape_shape[k] = x.shape[k]
            x_arange = paddle.arange(x.shape[k], dtype=index.dtype)
            x_arange = x_arange.reshape(reshape_shape)
            dim_index = paddle.expand(x_arange, index_shape).flatten()
            nd_index.append(dim_index)
    ind2 = paddle.transpose(paddle.stack(nd_index), [1, 0]).astype("int64")
    paddle_out = paddle.gather_nd(x, ind2).reshape(index_shape)
    return paddle_out
merge parakeet repo into deepspeech 3 years ago			`# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.`
			`#`
			`# Licensed under the Apache License, Version 2.0 (the "License");`
			`# you may not use this file except in compliance with the License.`
			`# You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`
add reference 3 years ago			`# Modified from espnet(https://github.com/espnet/espnet)`
add vits network scripts, test=tts 2 years ago			`from typing import Tuple`

merge parakeet repo into deepspeech 3 years ago			`import paddle`
			`from paddle import nn`
			`from typeguard import check_argument_types`


			`def pad_list(xs, pad_value):`
			`"""Perform padding for the list of tensors.`

change the docstring style from numpydoc to google, test=tts 3 years ago			`Args:`
			xs (List[Tensor]): List of Tensors [(T_1, ``), (T_2, ``), ..., (T_B, `*`)].
			`pad_value (float): Value for padding.`

			`Returns:`
			Tensor: Padded tensor (B, Tmax, `*`).

			`Examples:`
			`>>> x = [paddle.ones([4]), paddle.ones([2]), paddle.ones([1])]`
			`>>> x`
			`[tensor([1., 1., 1., 1.]), tensor([1., 1.]), tensor([1.])]`
			`>>> pad_list(x, 0)`
			`tensor([[1., 1., 1., 1.],`
			`[1., 1., 0., 0.],`
			`[1., 0., 0., 0.]])`
merge parakeet repo into deepspeech 3 years ago			`"""`
			`n_batch = len(xs)`
			`max_len = max(x.shape[0] for x in xs)`
			`pad = paddle.full([n_batch, max_len, *xs[0].shape[1:]], pad_value)`

			`for i in range(n_batch):`
			`pad[i, :xs[i].shape[0]] = xs[i]`

			`return pad`


			`def make_pad_mask(lengths, length_dim=-1):`
			`"""Make mask tensor containing indices of padded part.`

change the docstring style from numpydoc to google, test=tts 3 years ago			`Args:`
			`lengths (Tensor(int64)): Batch of lengths (B,).`

			`Returns:`
			`Tensor(bool): Mask tensor containing indices of padded part bool.`

			`Examples:`
			`With only lengths.`

			`>>> lengths = [5, 3, 2]`
			`>>> make_non_pad_mask(lengths)`
			`masks = [[0, 0, 0, 0 ,0],`
			`[0, 0, 0, 1, 1],`
			`[0, 0, 1, 1, 1]]`
merge parakeet repo into deepspeech 3 years ago			`"""`
			`if length_dim == 0:`
			`raise ValueError("length_dim cannot be 0: {}".format(length_dim))`

fix fastspeech2 to static 3 years ago			`bs = paddle.shape(lengths)[0]`
			`maxlen = lengths.max()`
merge parakeet repo into deepspeech 3 years ago			`seq_range = paddle.arange(0, maxlen, dtype=paddle.int64)`
			`seq_range_expand = seq_range.unsqueeze(0).expand([bs, maxlen])`
fix fastspeech2 to static 3 years ago			`seq_length_expand = lengths.unsqueeze(-1)`
merge parakeet repo into deepspeech 3 years ago			`mask = seq_range_expand >= seq_length_expand`

			`return mask`


			`def make_non_pad_mask(lengths, length_dim=-1):`
			`"""Make mask tensor containing indices of non-padded part.`

change the docstring style from numpydoc to google, test=tts 3 years ago			`Args:`
			`lengths (Tensor(int64) or List): Batch of lengths (B,).`
			`xs (Tensor, optional): The reference tensor.`
			`If set, masks will be the same shape as this tensor.`
			`length_dim (int, optional): Dimension indicator of the above tensor.`
			`See the example.`

			`Returns:`
			`Tensor(bool): mask tensor containing indices of padded part bool.`

			`Examples:`
			`With only lengths.`

			`>>> lengths = [5, 3, 2]`
			`>>> make_non_pad_mask(lengths)`
			`masks = [[1, 1, 1, 1 ,1],`
			`[1, 1, 1, 0, 0],`
			`[1, 1, 0, 0, 0]]`
merge parakeet repo into deepspeech 3 years ago			`"""`
			`return paddle.logical_not(make_pad_mask(lengths, length_dim))`


			`def initialize(model: nn.Layer, init: str):`
			`"""Initialize weights of a neural network module.`

			`Parameters are initialized using the given method or distribution.`

			`Custom initialization routines can be implemented into submodules`

change the docstring style from numpydoc to google, test=tts 3 years ago			`Args:`
			`model (nn.Layer): Target.`
			`init (str): Method of initialization.`
merge parakeet repo into deepspeech 3 years ago			`"""`
			`assert check_argument_types()`

			`if init == "xavier_uniform":`
			`nn.initializer.set_global_initializer(nn.initializer.XavierUniform(),`
			`nn.initializer.Constant())`
			`elif init == "xavier_normal":`
			`nn.initializer.set_global_initializer(nn.initializer.XavierNormal(),`
			`nn.initializer.Constant())`
			`elif init == "kaiming_uniform":`
			`nn.initializer.set_global_initializer(nn.initializer.KaimingUniform(),`
			`nn.initializer.Constant())`
			`elif init == "kaiming_normal":`
			`nn.initializer.set_global_initializer(nn.initializer.KaimingNormal(),`
			`nn.initializer.Constant())`
			`else:`
			`raise ValueError("Unknown initialization: " + init)`
add vits network scripts, test=tts 2 years ago

			`# for VITS`
			`def get_random_segments(`
			`x: paddle.paddle,`
			`x_lengths: paddle.Tensor,`
			`segment_size: int, ) -> Tuple[paddle.Tensor, paddle.Tensor]:`
			`"""Get random segments.`
			`Args:`
			`x (Tensor): Input tensor (B, C, T).`
			`x_lengths (Tensor): Length tensor (B,).`
			`segment_size (int): Segment size.`
			`Returns:`
			`Tensor: Segmented tensor (B, C, segment_size).`
			`Tensor: Start index tensor (B,).`
			`"""`
			`b, c, t = paddle.shape(x)`
			`max_start_idx = x_lengths - segment_size`
			`start_idxs = paddle.cast(paddle.rand([b]) * max_start_idx, 'int64')`
			`segments = get_segments(x, start_idxs, segment_size)`

			`return segments, start_idxs`


			`def get_segments(`
			`x: paddle.Tensor,`
			`start_idxs: paddle.Tensor,`
			`segment_size: int, ) -> paddle.Tensor:`
			`"""Get segments.`
			`Args:`
			`x (Tensor): Input tensor (B, C, T).`
			`start_idxs (Tensor): Start index tensor (B,).`
			`segment_size (int): Segment size.`
			`Returns:`
			`Tensor: Segmented tensor (B, C, segment_size).`
			`"""`
			`b, c, t = paddle.shape(x)`
			`segments = paddle.zeros([b, c, segment_size], dtype=x.dtype)`
			`for i, start_idx in enumerate(start_idxs):`
			`segments[i] = x[i, :, start_idx:start_idx + segment_size]`
			`return segments`


			`# see https://github.com/PaddlePaddle/X2Paddle/blob/develop/docs/pytorch_project_convertor/API_docs/ops/torch.gather.md`
			`def paddle_gather(x, dim, index):`
			`index_shape = index.shape`
			`index_flatten = index.flatten()`
			`if dim < 0:`
			`dim = len(x.shape) + dim`
			`nd_index = []`
			`for k in range(len(x.shape)):`
			`if k == dim:`
			`nd_index.append(index_flatten)`
			`else:`
			`reshape_shape = [1] * len(x.shape)`
			`reshape_shape[k] = x.shape[k]`
			`x_arange = paddle.arange(x.shape[k], dtype=index.dtype)`
			`x_arange = x_arange.reshape(reshape_shape)`
			`dim_index = paddle.expand(x_arange, index_shape).flatten()`
			`nd_index.append(dim_index)`
			`ind2 = paddle.transpose(paddle.stack(nd_index), [1, 0]).astype("int64")`
			`paddle_out = paddle.gather_nd(x, ind2).reshape(index_shape)`
			`return paddle_out`