【Hackathon 8th No.7】Python版本适配 1 (#3965)

* fix setup.py

* Update generator.py

* fix optional

* Update setup.py
pull/3974/head
张春乔 8 months ago committed by GitHub
parent 65a1624c86
commit 25741d7038
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -19,7 +19,7 @@ from typing import Tuple
import paddle import paddle
from paddle import nn from paddle import nn
from paddle.nn import initializer as I from paddle.nn import initializer as I
from typeguard import check_argument_types from typeguard import typechecked
from paddlespeech.s2t.modules.align import BatchNorm1D from paddlespeech.s2t.modules.align import BatchNorm1D
from paddlespeech.s2t.modules.align import Conv1D from paddlespeech.s2t.modules.align import Conv1D
@ -34,6 +34,7 @@ __all__ = ['ConvolutionModule']
class ConvolutionModule(nn.Layer): class ConvolutionModule(nn.Layer):
"""ConvolutionModule in Conformer model.""" """ConvolutionModule in Conformer model."""
@typechecked
def __init__(self, def __init__(self,
channels: int, channels: int,
kernel_size: int=15, kernel_size: int=15,
@ -52,7 +53,6 @@ class ConvolutionModule(nn.Layer):
causal (bool): Whether use causal convolution or not causal (bool): Whether use causal convolution or not
bias (bool): Whether Conv with bias or not bias (bool): Whether Conv with bias or not
""" """
assert check_argument_types()
super().__init__() super().__init__()
self.bias = bias self.bias = bias
self.channels = channels self.channels = channels

@ -17,7 +17,7 @@ from typing import Union
import paddle import paddle
from paddle import nn from paddle import nn
from paddle.nn import functional as F from paddle.nn import functional as F
from typeguard import check_argument_types from typeguard import typechecked
from paddlespeech.s2t.modules.align import Linear from paddlespeech.s2t.modules.align import Linear
from paddlespeech.s2t.modules.loss import CTCLoss from paddlespeech.s2t.modules.loss import CTCLoss
@ -48,6 +48,7 @@ __all__ = ['CTCDecoder']
class CTCDecoderBase(nn.Layer): class CTCDecoderBase(nn.Layer):
@typechecked
def __init__(self, def __init__(self,
odim, odim,
enc_n_units, enc_n_units,
@ -66,7 +67,6 @@ class CTCDecoderBase(nn.Layer):
batch_average (bool): do batch dim wise average. batch_average (bool): do batch dim wise average.
grad_norm_type (str): Default, None. one of 'instance', 'batch', 'frame', None. grad_norm_type (str): Default, None. one of 'instance', 'batch', 'frame', None.
""" """
assert check_argument_types()
super().__init__() super().__init__()
self.blank_id = blank_id self.blank_id = blank_id

@ -21,7 +21,7 @@ from typing import Tuple
import paddle import paddle
from paddle import nn from paddle import nn
from typeguard import check_argument_types from typeguard import typechecked
from paddlespeech.s2t.decoders.scorers.scorer_interface import BatchScorerInterface from paddlespeech.s2t.decoders.scorers.scorer_interface import BatchScorerInterface
from paddlespeech.s2t.modules.align import Embedding from paddlespeech.s2t.modules.align import Embedding
@ -61,6 +61,7 @@ class TransformerDecoder(BatchScorerInterface, nn.Layer):
False: x -> x + att(x) False: x -> x + att(x)
""" """
@typechecked
def __init__(self, def __init__(self,
vocab_size: int, vocab_size: int,
encoder_output_size: int, encoder_output_size: int,
@ -77,8 +78,6 @@ class TransformerDecoder(BatchScorerInterface, nn.Layer):
concat_after: bool=False, concat_after: bool=False,
max_len: int=5000): max_len: int=5000):
assert check_argument_types()
nn.Layer.__init__(self) nn.Layer.__init__(self)
self.selfattention_layer_type = 'selfattn' self.selfattention_layer_type = 'selfattn'
attention_dim = encoder_output_size attention_dim = encoder_output_size
@ -276,6 +275,7 @@ class BiTransformerDecoder(BatchScorerInterface, nn.Layer):
False: x -> x + att(x) False: x -> x + att(x)
""" """
@typechecked
def __init__(self, def __init__(self,
vocab_size: int, vocab_size: int,
encoder_output_size: int, encoder_output_size: int,
@ -293,8 +293,6 @@ class BiTransformerDecoder(BatchScorerInterface, nn.Layer):
concat_after: bool=False, concat_after: bool=False,
max_len: int=5000): max_len: int=5000):
assert check_argument_types()
nn.Layer.__init__(self) nn.Layer.__init__(self)
self.left_decoder = TransformerDecoder( self.left_decoder = TransformerDecoder(
vocab_size, encoder_output_size, attention_heads, linear_units, vocab_size, encoder_output_size, attention_heads, linear_units,

@ -21,7 +21,7 @@ from typing import Union
import paddle import paddle
from paddle import nn from paddle import nn
from typeguard import check_argument_types from typeguard import typechecked
from paddlespeech.s2t.modules.activation import get_activation from paddlespeech.s2t.modules.activation import get_activation
from paddlespeech.s2t.modules.align import LayerNorm from paddlespeech.s2t.modules.align import LayerNorm
@ -58,6 +58,7 @@ __all__ = [
class BaseEncoder(nn.Layer): class BaseEncoder(nn.Layer):
@typechecked
def __init__(self, def __init__(self,
input_size: int, input_size: int,
output_size: int=256, output_size: int=256,
@ -73,7 +74,7 @@ class BaseEncoder(nn.Layer):
concat_after: bool=False, concat_after: bool=False,
static_chunk_size: int=0, static_chunk_size: int=0,
use_dynamic_chunk: bool=False, use_dynamic_chunk: bool=False,
global_cmvn: paddle.nn.Layer=None, global_cmvn: Optional[nn.Layer]=None,
use_dynamic_left_chunk: bool=False, use_dynamic_left_chunk: bool=False,
max_len: int=5000): max_len: int=5000):
""" """
@ -108,7 +109,6 @@ class BaseEncoder(nn.Layer):
use_dynamic_left_chunk (bool): whether use dynamic left chunk in use_dynamic_left_chunk (bool): whether use dynamic left chunk in
dynamic chunk training dynamic chunk training
""" """
assert check_argument_types()
super().__init__() super().__init__()
self._output_size = output_size self._output_size = output_size
@ -349,6 +349,7 @@ class BaseEncoder(nn.Layer):
class TransformerEncoder(BaseEncoder): class TransformerEncoder(BaseEncoder):
"""Transformer encoder module.""" """Transformer encoder module."""
@typechecked
def __init__( def __init__(
self, self,
input_size: int, input_size: int,
@ -365,12 +366,11 @@ class TransformerEncoder(BaseEncoder):
concat_after: bool=False, concat_after: bool=False,
static_chunk_size: int=0, static_chunk_size: int=0,
use_dynamic_chunk: bool=False, use_dynamic_chunk: bool=False,
global_cmvn: nn.Layer=None, global_cmvn: Optional[nn.Layer]=None,
use_dynamic_left_chunk: bool=False, ): use_dynamic_left_chunk: bool=False, ):
""" Construct TransformerEncoder """ Construct TransformerEncoder
See Encoder for the meaning of each parameter. See Encoder for the meaning of each parameter.
""" """
assert check_argument_types()
super().__init__(input_size, output_size, attention_heads, linear_units, super().__init__(input_size, output_size, attention_heads, linear_units,
num_blocks, dropout_rate, positional_dropout_rate, num_blocks, dropout_rate, positional_dropout_rate,
attention_dropout_rate, input_layer, attention_dropout_rate, input_layer,
@ -424,6 +424,7 @@ class TransformerEncoder(BaseEncoder):
class ConformerEncoder(BaseEncoder): class ConformerEncoder(BaseEncoder):
"""Conformer encoder module.""" """Conformer encoder module."""
@typechecked
def __init__(self, def __init__(self,
input_size: int, input_size: int,
output_size: int=256, output_size: int=256,
@ -439,7 +440,7 @@ class ConformerEncoder(BaseEncoder):
concat_after: bool=False, concat_after: bool=False,
static_chunk_size: int=0, static_chunk_size: int=0,
use_dynamic_chunk: bool=False, use_dynamic_chunk: bool=False,
global_cmvn: nn.Layer=None, global_cmvn: Optional[nn.Layer]=None,
use_dynamic_left_chunk: bool=False, use_dynamic_left_chunk: bool=False,
positionwise_conv_kernel_size: int=1, positionwise_conv_kernel_size: int=1,
macaron_style: bool=True, macaron_style: bool=True,
@ -466,8 +467,6 @@ class ConformerEncoder(BaseEncoder):
causal (bool): whether to use causal convolution or not. causal (bool): whether to use causal convolution or not.
cnn_module_norm (str): cnn conv norm type, Optional['batch_norm','layer_norm'] cnn_module_norm (str): cnn conv norm type, Optional['batch_norm','layer_norm']
""" """
assert check_argument_types()
super().__init__(input_size, output_size, attention_heads, linear_units, super().__init__(input_size, output_size, attention_heads, linear_units,
num_blocks, dropout_rate, positional_dropout_rate, num_blocks, dropout_rate, positional_dropout_rate,
attention_dropout_rate, input_layer, attention_dropout_rate, input_layer,
@ -519,6 +518,7 @@ class ConformerEncoder(BaseEncoder):
class SqueezeformerEncoder(nn.Layer): class SqueezeformerEncoder(nn.Layer):
@typechecked
def __init__(self, def __init__(self,
input_size: int, input_size: int,
encoder_dim: int=256, encoder_dim: int=256,
@ -541,7 +541,7 @@ class SqueezeformerEncoder(nn.Layer):
adaptive_scale: bool=True, adaptive_scale: bool=True,
activation_type: str="swish", activation_type: str="swish",
init_weights: bool=True, init_weights: bool=True,
global_cmvn: paddle.nn.Layer=None, global_cmvn: Optional[nn.Layer]=None,
normalize_before: bool=False, normalize_before: bool=False,
use_dynamic_chunk: bool=False, use_dynamic_chunk: bool=False,
concat_after: bool=False, concat_after: bool=False,
@ -572,7 +572,6 @@ class SqueezeformerEncoder(nn.Layer):
init_weights (bool): Whether to initialize weights. init_weights (bool): Whether to initialize weights.
causal (bool): whether to use causal convolution or not. causal (bool): whether to use causal convolution or not.
""" """
assert check_argument_types()
super().__init__() super().__init__()
self.global_cmvn = global_cmvn self.global_cmvn = global_cmvn
self.reduce_idx: Optional[Union[int, List[int]]] = [reduce_idx] \ self.reduce_idx: Optional[Union[int, List[int]]] = [reduce_idx] \

@ -19,7 +19,7 @@ from typing import Union
import paddle import paddle
from paddle.optimizer.lr import LRScheduler from paddle.optimizer.lr import LRScheduler
from typeguard import check_argument_types from typeguard import typechecked
from paddlespeech.s2t.utils.dynamic_import import dynamic_import from paddlespeech.s2t.utils.dynamic_import import dynamic_import
from paddlespeech.s2t.utils.dynamic_import import instance_class from paddlespeech.s2t.utils.dynamic_import import instance_class
@ -57,13 +57,13 @@ class WarmupLR(LRScheduler):
Note that the maximum lr equals to optimizer.lr in this scheduler. Note that the maximum lr equals to optimizer.lr in this scheduler.
""" """
@typechecked
def __init__(self, def __init__(self,
warmup_steps: Union[int, float]=25000, warmup_steps: Union[int, float]=25000,
learning_rate=1.0, learning_rate=1.0,
last_epoch=-1, last_epoch=-1,
verbose=False, verbose=False,
**kwargs): **kwargs):
assert check_argument_types()
self.warmup_steps = warmup_steps self.warmup_steps = warmup_steps
super().__init__(learning_rate, last_epoch, verbose) super().__init__(learning_rate, last_epoch, verbose)

@ -20,7 +20,7 @@ from typing import Tuple
import numpy as np import numpy as np
import paddle import paddle
from paddle import nn from paddle import nn
from typeguard import check_argument_types from typeguard import typechecked
from paddlespeech.t2s.models.diffsinger.fastspeech2midi import FastSpeech2MIDI from paddlespeech.t2s.models.diffsinger.fastspeech2midi import FastSpeech2MIDI
from paddlespeech.t2s.modules.diffnet import DiffNet from paddlespeech.t2s.modules.diffnet import DiffNet
@ -40,6 +40,7 @@ class DiffSinger(nn.Layer):
""" """
@typechecked
def __init__( def __init__(
self, self,
# min and max spec for stretching before diffusion # min and max spec for stretching before diffusion
@ -157,7 +158,6 @@ class DiffSinger(nn.Layer):
denoiser_params (Dict[str, Any]): Parameter dict for dinoiser module. denoiser_params (Dict[str, Any]): Parameter dict for dinoiser module.
diffusion_params (Dict[str, Any]): Parameter dict for diffusion module. diffusion_params (Dict[str, Any]): Parameter dict for diffusion module.
""" """
assert check_argument_types()
super().__init__() super().__init__()
self.fs2 = FastSpeech2MIDI( self.fs2 = FastSpeech2MIDI(
idim=idim, idim=idim,
@ -336,6 +336,7 @@ class DiffSingerInference(nn.Layer):
class DiffusionLoss(nn.Layer): class DiffusionLoss(nn.Layer):
"""Loss function module for Diffusion module on DiffSinger.""" """Loss function module for Diffusion module on DiffSinger."""
@typechecked
def __init__(self, use_masking: bool=True, def __init__(self, use_masking: bool=True,
use_weighted_masking: bool=False): use_weighted_masking: bool=False):
"""Initialize feed-forward Transformer loss module. """Initialize feed-forward Transformer loss module.
@ -345,7 +346,6 @@ class DiffusionLoss(nn.Layer):
use_weighted_masking (bool): use_weighted_masking (bool):
Whether to weighted masking in loss calculation. Whether to weighted masking in loss calculation.
""" """
assert check_argument_types()
super().__init__() super().__init__()
assert (use_masking != use_weighted_masking) or not use_masking assert (use_masking != use_weighted_masking) or not use_masking

@ -19,7 +19,7 @@ from typing import Tuple
import paddle import paddle
from paddle import nn from paddle import nn
from typeguard import check_argument_types from typeguard import typechecked
from paddlespeech.t2s.models.fastspeech2 import FastSpeech2 from paddlespeech.t2s.models.fastspeech2 import FastSpeech2
from paddlespeech.t2s.models.fastspeech2 import FastSpeech2Loss from paddlespeech.t2s.models.fastspeech2 import FastSpeech2Loss
@ -33,6 +33,7 @@ class FastSpeech2MIDI(FastSpeech2):
"""The Fastspeech2 module of DiffSinger. """The Fastspeech2 module of DiffSinger.
""" """
@typechecked
def __init__( def __init__(
self, self,
# fastspeech2 network structure related # fastspeech2 network structure related
@ -57,7 +58,6 @@ class FastSpeech2MIDI(FastSpeech2):
is_slur_ids will be provided as the input is_slur_ids will be provided as the input
""" """
assert check_argument_types()
super().__init__(idim=idim, odim=odim, **fastspeech2_params) super().__init__(idim=idim, odim=odim, **fastspeech2_params)
self.use_energy_pred = use_energy_pred self.use_energy_pred = use_energy_pred
self.use_postnet = use_postnet self.use_postnet = use_postnet
@ -495,6 +495,7 @@ class FastSpeech2MIDI(FastSpeech2):
class FastSpeech2MIDILoss(FastSpeech2Loss): class FastSpeech2MIDILoss(FastSpeech2Loss):
"""Loss function module for DiffSinger.""" """Loss function module for DiffSinger."""
@typechecked
def __init__(self, use_masking: bool=True, def __init__(self, use_masking: bool=True,
use_weighted_masking: bool=False): use_weighted_masking: bool=False):
"""Initialize feed-forward Transformer loss module. """Initialize feed-forward Transformer loss module.
@ -504,7 +505,6 @@ class FastSpeech2MIDILoss(FastSpeech2Loss):
use_weighted_masking (bool): use_weighted_masking (bool):
Whether to weighted masking in loss calculation. Whether to weighted masking in loss calculation.
""" """
assert check_argument_types()
super().__init__(use_masking, use_weighted_masking) super().__init__(use_masking, use_weighted_masking)
def forward( def forward(

@ -15,6 +15,7 @@
"""Fastspeech2 related modules for paddle""" """Fastspeech2 related modules for paddle"""
from typing import Dict from typing import Dict
from typing import List from typing import List
from typing import Optional
from typing import Sequence from typing import Sequence
from typing import Tuple from typing import Tuple
from typing import Union from typing import Union
@ -23,7 +24,7 @@ import numpy as np
import paddle import paddle
import paddle.nn.functional as F import paddle.nn.functional as F
from paddle import nn from paddle import nn
from typeguard import check_argument_types from typeguard import typechecked
from paddlespeech.t2s.modules.adversarial_loss.gradient_reversal import GradientReversalLayer from paddlespeech.t2s.modules.adversarial_loss.gradient_reversal import GradientReversalLayer
from paddlespeech.t2s.modules.adversarial_loss.speaker_classifier import SpeakerClassifier from paddlespeech.t2s.modules.adversarial_loss.speaker_classifier import SpeakerClassifier
@ -60,6 +61,7 @@ class FastSpeech2(nn.Layer):
""" """
@typechecked
def __init__( def __init__(
self, self,
# network structure related # network structure related
@ -131,12 +133,12 @@ class FastSpeech2(nn.Layer):
pitch_embed_dropout: float=0.5, pitch_embed_dropout: float=0.5,
stop_gradient_from_pitch_predictor: bool=False, stop_gradient_from_pitch_predictor: bool=False,
# spk emb # spk emb
spk_num: int=None, spk_num: Optional[int]=None,
spk_embed_dim: int=None, spk_embed_dim: Optional[int]=None,
spk_embed_integration_type: str="add", spk_embed_integration_type: str="add",
# tone emb # tone emb
tone_num: int=None, tone_num: Optional[int]=None,
tone_embed_dim: int=None, tone_embed_dim: Optional[int]=None,
tone_embed_integration_type: str="add", tone_embed_integration_type: str="add",
# training related # training related
init_type: str="xavier_uniform", init_type: str="xavier_uniform",
@ -282,7 +284,6 @@ class FastSpeech2(nn.Layer):
The hidden layer dim of speaker classifier The hidden layer dim of speaker classifier
""" """
assert check_argument_types()
super().__init__() super().__init__()
# store hyperparameters # store hyperparameters
@ -1070,6 +1071,7 @@ class StyleFastSpeech2Inference(FastSpeech2Inference):
class FastSpeech2Loss(nn.Layer): class FastSpeech2Loss(nn.Layer):
"""Loss function module for FastSpeech2.""" """Loss function module for FastSpeech2."""
@typechecked
def __init__(self, use_masking: bool=True, def __init__(self, use_masking: bool=True,
use_weighted_masking: bool=False): use_weighted_masking: bool=False):
"""Initialize feed-forward Transformer loss module. """Initialize feed-forward Transformer loss module.
@ -1079,7 +1081,6 @@ class FastSpeech2Loss(nn.Layer):
use_weighted_masking (bool): use_weighted_masking (bool):
Whether to weighted masking in loss calculation. Whether to weighted masking in loss calculation.
""" """
assert check_argument_types()
super().__init__() super().__init__()
assert (use_masking != use_weighted_masking) or not use_masking assert (use_masking != use_weighted_masking) or not use_masking

@ -28,7 +28,6 @@ from typing import Tuple
import numpy as np import numpy as np
import paddle import paddle
from paddle import nn from paddle import nn
from typeguard import check_argument_types
from paddlespeech.t2s.models.hifigan import HiFiGANGenerator from paddlespeech.t2s.models.hifigan import HiFiGANGenerator
from paddlespeech.t2s.models.jets.alignments import AlignmentModule from paddlespeech.t2s.models.jets.alignments import AlignmentModule

@ -24,7 +24,7 @@ from typing import Optional
import paddle import paddle
from paddle import nn from paddle import nn
from typeguard import check_argument_types from typeguard import typechecked
from paddlespeech.t2s.models.hifigan import HiFiGANMultiPeriodDiscriminator from paddlespeech.t2s.models.hifigan import HiFiGANMultiPeriodDiscriminator
from paddlespeech.t2s.models.hifigan import HiFiGANMultiScaleDiscriminator from paddlespeech.t2s.models.hifigan import HiFiGANMultiScaleDiscriminator
@ -64,6 +64,7 @@ class JETS(nn.Layer):
Text-to-Speech`: https://arxiv.org/abs/2203.16852v1 Text-to-Speech`: https://arxiv.org/abs/2203.16852v1
""" """
@typechecked
def __init__( def __init__(
self, self,
# generator related # generator related
@ -225,7 +226,6 @@ class JETS(nn.Layer):
cache_generator_outputs (bool): cache_generator_outputs (bool):
Whether to cache generator outputs. Whether to cache generator outputs.
""" """
assert check_argument_types()
super().__init__() super().__init__()
# define modules # define modules
@ -279,8 +279,7 @@ class JETS(nn.Layer):
lids: Optional[paddle.Tensor]=None, lids: Optional[paddle.Tensor]=None,
forward_generator: bool=True, forward_generator: bool=True,
use_alignment_module: bool=False, use_alignment_module: bool=False,
**kwargs, **kwargs, ) -> Dict[str, Any]:
) -> Dict[str, Any]:
"""Perform generator forward. """Perform generator forward.
Args: Args:
text (Tensor): text (Tensor):

@ -21,7 +21,7 @@ from typing import Tuple
import paddle import paddle
import paddle.nn.functional as F import paddle.nn.functional as F
from paddle import nn from paddle import nn
from typeguard import check_argument_types from typeguard import typechecked
from paddlespeech.t2s.modules.nets_utils import initialize from paddlespeech.t2s.modules.nets_utils import initialize
from paddlespeech.t2s.modules.nets_utils import make_pad_mask from paddlespeech.t2s.modules.nets_utils import make_pad_mask
@ -44,6 +44,7 @@ class Tacotron2(nn.Layer):
""" """
@typechecked
def __init__( def __init__(
self, self,
# network structure related # network structure related
@ -67,7 +68,7 @@ class Tacotron2(nn.Layer):
postnet_layers: int=5, postnet_layers: int=5,
postnet_chans: int=512, postnet_chans: int=512,
postnet_filts: int=5, postnet_filts: int=5,
output_activation: str=None, output_activation: Optional[str]=None,
use_batch_norm: bool=True, use_batch_norm: bool=True,
use_concate: bool=True, use_concate: bool=True,
use_residual: bool=False, use_residual: bool=False,
@ -145,7 +146,6 @@ class Tacotron2(nn.Layer):
zoneout_rate (float): zoneout_rate (float):
Zoneout rate. Zoneout rate.
""" """
assert check_argument_types()
super().__init__() super().__init__()
# store hyperparameters # store hyperparameters

@ -13,7 +13,9 @@
# limitations under the License. # limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet) # Modified from espnet(https://github.com/espnet/espnet)
"""Fastspeech2 related modules for paddle""" """Fastspeech2 related modules for paddle"""
from optparse import Option
from typing import Dict from typing import Dict
from typing import Optional
from typing import Sequence from typing import Sequence
from typing import Tuple from typing import Tuple
@ -21,7 +23,7 @@ import numpy
import paddle import paddle
import paddle.nn.functional as F import paddle.nn.functional as F
from paddle import nn from paddle import nn
from typeguard import check_argument_types from typeguard import typechecked
from paddlespeech.t2s.modules.nets_utils import initialize from paddlespeech.t2s.modules.nets_utils import initialize
from paddlespeech.t2s.modules.nets_utils import make_non_pad_mask from paddlespeech.t2s.modules.nets_utils import make_non_pad_mask
@ -169,6 +171,7 @@ class TransformerTTS(nn.Layer):
Number of layers to apply guided attention loss. Number of layers to apply guided attention loss.
""" """
@typechecked
def __init__( def __init__(
self, self,
# network structure related # network structure related
@ -198,7 +201,7 @@ class TransformerTTS(nn.Layer):
encoder_concat_after: bool=False, encoder_concat_after: bool=False,
decoder_concat_after: bool=False, decoder_concat_after: bool=False,
reduction_factor: int=1, reduction_factor: int=1,
spk_embed_dim: int=None, spk_embed_dim: Optional[int]=None,
spk_embed_integration_type: str="add", spk_embed_integration_type: str="add",
use_gst: bool=False, use_gst: bool=False,
gst_tokens: int=10, gst_tokens: int=10,
@ -227,7 +230,7 @@ class TransformerTTS(nn.Layer):
num_heads_applied_guided_attn: int=2, num_heads_applied_guided_attn: int=2,
num_layers_applied_guided_attn: int=2, ): num_layers_applied_guided_attn: int=2, ):
"""Initialize Transformer module.""" """Initialize Transformer module."""
assert check_argument_types()
super().__init__() super().__init__()
# store hyperparameters # store hyperparameters

@ -20,7 +20,7 @@ from typing import Optional
import paddle import paddle
from paddle import nn from paddle import nn
from typeguard import check_argument_types from typeguard import typechecked
from paddlespeech.t2s.models.hifigan import HiFiGANMultiPeriodDiscriminator from paddlespeech.t2s.models.hifigan import HiFiGANMultiPeriodDiscriminator
from paddlespeech.t2s.models.hifigan import HiFiGANMultiScaleDiscriminator from paddlespeech.t2s.models.hifigan import HiFiGANMultiScaleDiscriminator
@ -60,6 +60,7 @@ class VITS(nn.Layer):
Text-to-Speech`: https://arxiv.org/abs/2006.04558 Text-to-Speech`: https://arxiv.org/abs/2006.04558
""" """
@typechecked
def __init__( def __init__(
self, self,
# generator related # generator related
@ -181,7 +182,6 @@ class VITS(nn.Layer):
cache_generator_outputs (bool): cache_generator_outputs (bool):
Whether to cache generator outputs. Whether to cache generator outputs.
""" """
assert check_argument_types()
super().__init__() super().__init__()
# define modules # define modules
@ -504,8 +504,9 @@ class VITS(nn.Layer):
def reset_parameters(self): def reset_parameters(self):
def _reset_parameters(module): def _reset_parameters(module):
if isinstance(module, if isinstance(
(nn.Conv1D, nn.Conv1DTranspose, nn.Conv2D, nn.Conv2DTranspose)): module,
(nn.Conv1D, nn.Conv1DTranspose, nn.Conv2D, nn.Conv2DTranspose)):
kaiming_uniform_(module.weight, a=math.sqrt(5)) kaiming_uniform_(module.weight, a=math.sqrt(5))
if module.bias is not None: if module.bias is not None:
fan_in, _ = _calculate_fan_in_and_fan_out(module.weight) fan_in, _ = _calculate_fan_in_and_fan_out(module.weight)
@ -513,8 +514,9 @@ class VITS(nn.Layer):
bound = 1 / math.sqrt(fan_in) bound = 1 / math.sqrt(fan_in)
uniform_(module.bias, -bound, bound) uniform_(module.bias, -bound, bound)
if isinstance(module, if isinstance(
(nn.BatchNorm1D, nn.BatchNorm2D, nn.GroupNorm, nn.LayerNorm)): module,
(nn.BatchNorm1D, nn.BatchNorm2D, nn.GroupNorm, nn.LayerNorm)):
ones_(module.weight) ones_(module.weight)
zeros_(module.bias) zeros_(module.bias)
@ -533,13 +535,13 @@ class VITS(nn.Layer):
self.apply(_reset_parameters) self.apply(_reset_parameters)
class VITSInference(nn.Layer): class VITSInference(nn.Layer):
def __init__(self, model): def __init__(self, model):
super().__init__() super().__init__()
self.acoustic_model = model self.acoustic_model = model
def forward(self, text, sids=None): def forward(self, text, sids=None):
out = self.acoustic_model.inference( out = self.acoustic_model.inference(text, sids=sids)
text, sids=sids)
wav = out['wav'] wav = out['wav']
return wav return wav

@ -14,16 +14,16 @@
# Modified from Cross-Lingual-Voice-Cloning(https://github.com/deterministic-algorithms-lab/Cross-Lingual-Voice-Cloning) # Modified from Cross-Lingual-Voice-Cloning(https://github.com/deterministic-algorithms-lab/Cross-Lingual-Voice-Cloning)
import paddle import paddle
from paddle import nn from paddle import nn
from typeguard import check_argument_types from typeguard import typechecked
class SpeakerClassifier(nn.Layer): class SpeakerClassifier(nn.Layer):
@typechecked
def __init__( def __init__(
self, self,
idim: int, idim: int,
hidden_sc_dim: int, hidden_sc_dim: int,
spk_num: int, ): spk_num: int, ):
assert check_argument_types()
super().__init__() super().__init__()
# store hyperparameters # store hyperparameters
self.idim = idim self.idim = idim

@ -21,7 +21,7 @@ from paddle import nn
from paddle.nn import functional as F from paddle.nn import functional as F
from scipy import signal from scipy import signal
from scipy.stats import betabinom from scipy.stats import betabinom
from typeguard import check_argument_types from typeguard import typechecked
from paddlespeech.t2s.modules.nets_utils import make_non_pad_mask from paddlespeech.t2s.modules.nets_utils import make_non_pad_mask
from paddlespeech.t2s.modules.predictor.duration_predictor import ( from paddlespeech.t2s.modules.predictor.duration_predictor import (
@ -1137,6 +1137,7 @@ class MLMLoss(nn.Layer):
class VarianceLoss(nn.Layer): class VarianceLoss(nn.Layer):
@typechecked
def __init__(self, use_masking: bool=True, def __init__(self, use_masking: bool=True,
use_weighted_masking: bool=False): use_weighted_masking: bool=False):
"""Initialize JETS variance loss module. """Initialize JETS variance loss module.
@ -1147,7 +1148,6 @@ class VarianceLoss(nn.Layer):
calculation. calculation.
""" """
assert check_argument_types()
super().__init__() super().__init__()
assert (use_masking != use_weighted_masking) or not use_masking assert (use_masking != use_weighted_masking) or not use_masking

@ -18,7 +18,7 @@ from typing import Tuple
import numpy as np import numpy as np
import paddle import paddle
from paddle import nn from paddle import nn
from typeguard import check_argument_types from typeguard import typechecked
from paddlespeech.utils.initialize import _calculate_fan_in_and_fan_out from paddlespeech.utils.initialize import _calculate_fan_in_and_fan_out
from paddlespeech.utils.initialize import kaiming_uniform_ from paddlespeech.utils.initialize import kaiming_uniform_
@ -301,6 +301,7 @@ def make_non_pad_mask(lengths, xs=None, length_dim=-1):
return paddle.logical_not(make_pad_mask(lengths, xs, length_dim)) return paddle.logical_not(make_pad_mask(lengths, xs, length_dim))
@typechecked
def initialize(model: nn.Layer, init: str): def initialize(model: nn.Layer, init: str):
"""Initialize weights of a neural network module. """Initialize weights of a neural network module.
@ -314,8 +315,6 @@ def initialize(model: nn.Layer, init: str):
init (str): init (str):
Method of initialization. Method of initialization.
""" """
assert check_argument_types()
if init == "xavier_uniform": if init == "xavier_uniform":
nn.initializer.set_global_initializer(nn.initializer.XavierUniform(), nn.initializer.set_global_initializer(nn.initializer.XavierUniform(),
nn.initializer.Constant()) nn.initializer.Constant())

@ -15,7 +15,7 @@
"""Variance predictor related modules.""" """Variance predictor related modules."""
import paddle import paddle
from paddle import nn from paddle import nn
from typeguard import check_argument_types from typeguard import typechecked
from paddlespeech.t2s.modules.layer_norm import LayerNorm from paddlespeech.t2s.modules.layer_norm import LayerNorm
from paddlespeech.t2s.modules.masked_fill import masked_fill from paddlespeech.t2s.modules.masked_fill import masked_fill
@ -32,6 +32,7 @@ class VariancePredictor(nn.Layer):
""" """
@typechecked
def __init__( def __init__(
self, self,
idim: int, idim: int,
@ -54,7 +55,6 @@ class VariancePredictor(nn.Layer):
dropout_rate (float, optional): dropout_rate (float, optional):
Dropout rate. Dropout rate.
""" """
assert check_argument_types()
super().__init__() super().__init__()
self.conv = nn.LayerList() self.conv = nn.LayerList()
for idx in range(n_layers): for idx in range(n_layers):
@ -96,7 +96,7 @@ class VariancePredictor(nn.Layer):
xs = f(xs) xs = f(xs)
# (B, Tmax, 1) # (B, Tmax, 1)
xs = self.linear(xs.transpose([0, 2, 1])) xs = self.linear(xs.transpose([0, 2, 1]))
if x_masks is not None: if x_masks is not None:
xs = masked_fill(xs, x_masks, 0.0) xs = masked_fill(xs, x_masks, 0.0)
return xs return xs

@ -17,7 +17,7 @@ from typing import Sequence
import paddle import paddle
from paddle import nn from paddle import nn
from typeguard import check_argument_types from typeguard import typechecked
from paddlespeech.t2s.modules.transformer.attention import MultiHeadedAttention as BaseMultiHeadedAttention from paddlespeech.t2s.modules.transformer.attention import MultiHeadedAttention as BaseMultiHeadedAttention
@ -58,6 +58,7 @@ class StyleEncoder(nn.Layer):
""" """
@typechecked
def __init__( def __init__(
self, self,
idim: int=80, idim: int=80,
@ -71,7 +72,6 @@ class StyleEncoder(nn.Layer):
gru_layers: int=1, gru_layers: int=1,
gru_units: int=128, ): gru_units: int=128, ):
"""Initilize global style encoder module.""" """Initilize global style encoder module."""
assert check_argument_types()
super().__init__() super().__init__()
self.ref_enc = ReferenceEncoder( self.ref_enc = ReferenceEncoder(
@ -132,6 +132,7 @@ class ReferenceEncoder(nn.Layer):
""" """
@typechecked
def __init__( def __init__(
self, self,
idim=80, idim=80,
@ -142,7 +143,6 @@ class ReferenceEncoder(nn.Layer):
gru_layers: int=1, gru_layers: int=1,
gru_units: int=128, ): gru_units: int=128, ):
"""Initilize reference encoder module.""" """Initilize reference encoder module."""
assert check_argument_types()
super().__init__() super().__init__()
# check hyperparameters are valid # check hyperparameters are valid
@ -232,6 +232,7 @@ class StyleTokenLayer(nn.Layer):
""" """
@typechecked
def __init__( def __init__(
self, self,
ref_embed_dim: int=128, ref_embed_dim: int=128,
@ -240,7 +241,6 @@ class StyleTokenLayer(nn.Layer):
gst_heads: int=4, gst_heads: int=4,
dropout_rate: float=0.0, ): dropout_rate: float=0.0, ):
"""Initilize style token layer module.""" """Initilize style token layer module."""
assert check_argument_types()
super().__init__() super().__init__()
gst_embs = paddle.randn(shape=[gst_tokens, gst_token_dim // gst_heads]) gst_embs = paddle.randn(shape=[gst_tokens, gst_token_dim // gst_heads])

@ -69,8 +69,8 @@ base = [
"soundfile", "soundfile",
"textgrid", "textgrid",
"timer", "timer",
"ToJyutping==0.2.1", "ToJyutping",
"typeguard==2.13.3", "typeguard",
"webrtcvad", "webrtcvad",
"yacs~=0.1.8", "yacs~=0.1.8",
"zhon", "zhon",

Loading…
Cancel
Save