PaddleSpeech/paddlespeech/t2s/modules/pqmf.py

# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
"""Pseudo QMF modules."""
import numpy as np
import paddle
import paddle.nn.functional as F
from paddle import nn
from scipy.signal.windows import kaiser


def design_prototype_filter(taps=62, cutoff_ratio=0.142, beta=9.0):
    """Design prototype filter for PQMF.
    This method is based on `A Kaiser window approach for the design of prototype
    filters of cosine modulated filterbanks`_.

    Args:
        taps (int): 
            The number of filter taps.
        cutoff_ratio (float): 
            Cut-off frequency ratio.
        beta (float): 
            Beta coefficient for kaiser window.
    Returns:
        ndarray:
            Impluse response of prototype filter (taps + 1,).
        .. _`A Kaiser window approach for the design of prototype filters of cosine modulated filterbanks`:
            https://ieeexplore.ieee.org/abstract/document/681427
    """
    # check the arguments are valid
    assert taps % 2 == 0, "The number of taps mush be even number."
    assert 0.0 < cutoff_ratio < 1.0, "Cutoff ratio must be > 0.0 and < 1.0."
    # make initial filter
    omega_c = np.pi * cutoff_ratio
    with np.errstate(invalid="ignore"):
        h_i = np.sin(omega_c * (np.arange(taps + 1) - 0.5 * taps)) / (
            np.pi * (np.arange(taps + 1) - 0.5 * taps))
    h_i[taps //
        2] = np.cos(0) * cutoff_ratio  # fix nan due to indeterminate form

    # apply kaiser window
    w = kaiser(taps + 1, beta)
    h = h_i * w

    return h


class PQMF(nn.Layer):
    """PQMF module.
    This module is based on `Near-perfect-reconstruction pseudo-QMF banks`_.
    .. _`Near-perfect-reconstruction pseudo-QMF banks`:
        https://ieeexplore.ieee.org/document/258122
    """

    def __init__(self, subbands=4, taps=62, cutoff_ratio=0.142, beta=9.0):
        """Initilize PQMF module.
        The cutoff_ratio and beta parameters are optimized for #subbands = 4.
        See dicussion in https://github.com/kan-bayashi/ParallelWaveGAN/issues/195.

        Args:
            subbands (int): 
                The number of subbands.
            taps (int): 
                The number of filter taps.
            cutoff_ratio (float): 
                Cut-off frequency ratio.
            beta (float): 
                Beta coefficient for kaiser window.
        """
        super().__init__()

        h_proto = design_prototype_filter(taps, cutoff_ratio, beta)
        h_analysis = np.zeros((subbands, len(h_proto)))
        h_synthesis = np.zeros((subbands, len(h_proto)))
        for k in range(subbands):
            h_analysis[k] = (
                2 * h_proto * np.cos((2 * k + 1) * (np.pi / (2 * subbands)) * (
                    np.arange(taps + 1) - (taps / 2)) + (-1)**k * np.pi / 4))
            h_synthesis[k] = (
                2 * h_proto * np.cos((2 * k + 1) * (np.pi / (2 * subbands)) * (
                    np.arange(taps + 1) - (taps / 2)) - (-1)**k * np.pi / 4))

        # convert to tensor
        self.analysis_filter = paddle.to_tensor(
            h_analysis, dtype="float32").unsqueeze(1)
        self.synthesis_filter = paddle.to_tensor(
            h_synthesis, dtype="float32").unsqueeze(0)

        # filter for downsampling & upsampling
        updown_filter = paddle.zeros(
            (subbands, subbands, subbands), dtype="float32")
        for k in range(subbands):
            updown_filter[k, k, 0] = 1.0
        self.updown_filter = updown_filter
        self.subbands = subbands
        # keep padding info
        self.pad_fn = nn.Pad1D(taps // 2, mode='constant', value=0.0)

    def analysis(self, x):
        """Analysis with PQMF.
        Args:
            x (Tensor): 
                Input tensor (B, 1, T).
        Returns:
            Tensor: Output tensor (B, subbands, T // subbands).
        """
        x = F.conv1d(self.pad_fn(x), self.analysis_filter)
        return F.conv1d(x, self.updown_filter, stride=self.subbands)

    def synthesis(self, x):
        """Synthesis with PQMF.
        Args:
            x (Tensor): 
                Input tensor (B, subbands, T // subbands).
        Returns:
            Tensor: Output tensor (B, 1, T).
        """
        x = F.conv1d_transpose(
            x, self.updown_filter * self.subbands, stride=self.subbands)

        return F.conv1d(self.pad_fn(x), self.synthesis_filter)

    # when converting dygraph to static graph, can not use self.pqmf.synthesis directly
    def forward(self, x):
        return self.synthesis(x)
add csmsc mb melgan example 3 years ago			`# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.`
			`#`
			`# Licensed under the Apache License, Version 2.0 (the "License");`
			`# you may not use this file except in compliance with the License.`
			`# You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`
add reference 3 years ago			`# Modified from espnet(https://github.com/espnet/espnet)`
add csmsc mb melgan example 3 years ago			`"""Pseudo QMF modules."""`
			`import numpy as np`
			`import paddle`
			`import paddle.nn.functional as F`
refactor encoder, rm old code 3 years ago			`from paddle import nn`
fix scipy import error (#3874) 1 week ago			`from scipy.signal.windows import kaiser`
add csmsc mb melgan example 3 years ago

			`def design_prototype_filter(taps=62, cutoff_ratio=0.142, beta=9.0):`
			`"""Design prototype filter for PQMF.`
			This method is based on `A Kaiser window approach for the design of prototype
			filters of cosine modulated filterbanks`_.
change the docstring style from numpydoc to google, test=tts 3 years ago
			`Args:`
fromat doc_string 2 years ago			`taps (int):`
			`The number of filter taps.`
			`cutoff_ratio (float):`
			`Cut-off frequency ratio.`
			`beta (float):`
			`Beta coefficient for kaiser window.`
change the docstring style from numpydoc to google, test=tts 3 years ago			`Returns:`
			`ndarray:`
			`Impluse response of prototype filter (taps + 1,).`
			.. _`A Kaiser window approach for the design of prototype filters of cosine modulated filterbanks`:
			`https://ieeexplore.ieee.org/abstract/document/681427`
add csmsc mb melgan example 3 years ago			`"""`
			`# check the arguments are valid`
			`assert taps % 2 == 0, "The number of taps mush be even number."`
			`assert 0.0 < cutoff_ratio < 1.0, "Cutoff ratio must be > 0.0 and < 1.0."`
			`# make initial filter`
			`omega_c = np.pi * cutoff_ratio`
			`with np.errstate(invalid="ignore"):`
fix mbmelgan static 3 years ago			`h_i = np.sin(omega_c * (np.arange(taps + 1) - 0.5 * taps)) / (`
			`np.pi * (np.arange(taps + 1) - 0.5 * taps))`
			`h_i[taps //`
			`2] = np.cos(0) * cutoff_ratio # fix nan due to indeterminate form`

add csmsc mb melgan example 3 years ago			`# apply kaiser window`
			`w = kaiser(taps + 1, beta)`
			`h = h_i * w`
fix mbmelgan static 3 years ago
add csmsc mb melgan example 3 years ago			`return h`


refactor encoder, rm old code 3 years ago			`class PQMF(nn.Layer):`
add csmsc mb melgan example 3 years ago			`"""PQMF module.`
			This module is based on `Near-perfect-reconstruction pseudo-QMF banks`_.
			.. _`Near-perfect-reconstruction pseudo-QMF banks`:
			`https://ieeexplore.ieee.org/document/258122`
			`"""`

			`def __init__(self, subbands=4, taps=62, cutoff_ratio=0.142, beta=9.0):`
			`"""Initilize PQMF module.`
			`The cutoff_ratio and beta parameters are optimized for #subbands = 4.`
			`See dicussion in https://github.com/kan-bayashi/ParallelWaveGAN/issues/195.`
change the docstring style from numpydoc to google, test=tts 3 years ago
			`Args:`
fromat doc_string 2 years ago			`subbands (int):`
			`The number of subbands.`
			`taps (int):`
			`The number of filter taps.`
			`cutoff_ratio (float):`
			`Cut-off frequency ratio.`
			`beta (float):`
			`Beta coefficient for kaiser window.`
add csmsc mb melgan example 3 years ago			`"""`
fix mbmelgan static 3 years ago			`super().__init__()`
fix mbmelgan static 3 years ago
add csmsc mb melgan example 3 years ago			`h_proto = design_prototype_filter(taps, cutoff_ratio, beta)`
fix mbmelgan static 3 years ago			`h_analysis = np.zeros((subbands, len(h_proto)))`
			`h_synthesis = np.zeros((subbands, len(h_proto)))`
add csmsc mb melgan example 3 years ago			`for k in range(subbands):`
			`h_analysis[k] = (`
fix mbmelgan static 3 years ago			`2 * h_proto * np.cos((2 * k + 1) * (np.pi / (2 * subbands)) * (`
			`np.arange(taps + 1) - (taps / 2)) + (-1)*k np.pi / 4))`
add csmsc mb melgan example 3 years ago			`h_synthesis[k] = (`
fix mbmelgan static 3 years ago			`2 * h_proto * np.cos((2 * k + 1) * (np.pi / (2 * subbands)) * (`
			`np.arange(taps + 1) - (taps / 2)) - (-1)*k np.pi / 4))`

			`# convert to tensor`
			`self.analysis_filter = paddle.to_tensor(`
			`h_analysis, dtype="float32").unsqueeze(1)`
			`self.synthesis_filter = paddle.to_tensor(`
			`h_synthesis, dtype="float32").unsqueeze(0)`
add csmsc mb melgan example 3 years ago
			`# filter for downsampling & upsampling`
			`updown_filter = paddle.zeros(`
			`(subbands, subbands, subbands), dtype="float32")`
			`for k in range(subbands):`
			`updown_filter[k, k, 0] = 1.0`
			`self.updown_filter = updown_filter`
			`self.subbands = subbands`
			`# keep padding info`
refactor encoder, rm old code 3 years ago			`self.pad_fn = nn.Pad1D(taps // 2, mode='constant', value=0.0)`
add csmsc mb melgan example 3 years ago
			`def analysis(self, x):`
			`"""Analysis with PQMF.`
change the docstring style from numpydoc to google, test=tts 3 years ago			`Args:`
fromat doc_string 2 years ago			`x (Tensor):`
			`Input tensor (B, 1, T).`
change the docstring style from numpydoc to google, test=tts 3 years ago			`Returns:`
			`Tensor: Output tensor (B, subbands, T // subbands).`
add csmsc mb melgan example 3 years ago			`"""`
			`x = F.conv1d(self.pad_fn(x), self.analysis_filter)`
			`return F.conv1d(x, self.updown_filter, stride=self.subbands)`

			`def synthesis(self, x):`
			`"""Synthesis with PQMF.`
change the docstring style from numpydoc to google, test=tts 3 years ago			`Args:`
fromat doc_string 2 years ago			`x (Tensor):`
			`Input tensor (B, subbands, T // subbands).`
change the docstring style from numpydoc to google, test=tts 3 years ago			`Returns:`
			`Tensor: Output tensor (B, 1, T).`
add csmsc mb melgan example 3 years ago			`"""`
			`x = F.conv1d_transpose(`
			`x, self.updown_filter * self.subbands, stride=self.subbands)`
fix mbmelgan static 3 years ago
add csmsc mb melgan example 3 years ago			`return F.conv1d(self.pad_fn(x), self.synthesis_filter)`
fix mbmelgan static 3 years ago
			`# when converting dygraph to static graph, can not use self.pqmf.synthesis directly`
			`def forward(self, x):`
			`return self.synthesis(x)`