You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
PaddleSpeech/examples/ljspeech/tts1/conf/default.yaml

92 lines
5.2 KiB

fs : 22050 # Hz, sample rate
n_fft : 1024 # FFT size (samples).
win_length : 1024 # Window length (samples). 46.4ms
n_shift : 256 # Hop size (samples). 11.6ms
fmin : 0 # Hz, min frequency when converting to mel
fmax : 8000 # Hz, max frequency when converting to mel
n_mels : 80 # mel bands
window: "hann" # Window function.
###########################################################
# DATA SETTING #
###########################################################
batch_size: 16
num_workers: 2
##########################################################
# TTS MODEL SETTING #
##########################################################
tts: transformertts # model architecture
model: # keyword arguments for the selected model
embed_dim: 0 # embedding dimension in encoder prenet
eprenet_conv_layers: 0 # number of conv layers in encoder prenet
# if set to 0, no encoder prenet will be used
eprenet_conv_filts: 0 # filter size of conv layers in encoder prenet
eprenet_conv_chans: 0 # number of channels of conv layers in encoder prenet
dprenet_layers: 2 # number of layers in decoder prenet
dprenet_units: 256 # number of units in decoder prenet
adim: 512 # attention dimension
aheads: 8 # number of attention heads
elayers: 6 # number of encoder layers
eunits: 1024 # number of encoder ff units
dlayers: 6 # number of decoder layers
dunits: 1024 # number of decoder ff units
positionwise_layer_type: conv1d # type of position-wise layer
positionwise_conv_kernel_size: 1 # kernel size of position wise conv layer
postnet_layers: 5 # number of layers of postnset
postnet_filts: 5 # filter size of conv layers in postnet
postnet_chans: 256 # number of channels of conv layers in postnet
use_scaled_pos_enc: True # whether to use scaled positional encoding
encoder_normalize_before: True # whether to perform layer normalization before the input
decoder_normalize_before: True # whether to perform layer normalization before the input
reduction_factor: 1 # reduction factor
init_type: xavier_uniform # initialization type
init_enc_alpha: 1.0 # initial value of alpha of encoder scaled position encoding
init_dec_alpha: 1.0 # initial value of alpha of decoder scaled position encoding
eprenet_dropout_rate: 0.0 # dropout rate for encoder prenet
dprenet_dropout_rate: 0.5 # dropout rate for decoder prenet
postnet_dropout_rate: 0.5 # dropout rate for postnet
transformer_enc_dropout_rate: 0.1 # dropout rate for transformer encoder layer
transformer_enc_positional_dropout_rate: 0.1 # dropout rate for transformer encoder positional encoding
transformer_enc_attn_dropout_rate: 0.1 # dropout rate for transformer encoder attention layer
transformer_dec_dropout_rate: 0.1 # dropout rate for transformer decoder layer
transformer_dec_positional_dropout_rate: 0.1 # dropout rate for transformer decoder positional encoding
transformer_dec_attn_dropout_rate: 0.1 # dropout rate for transformer decoder attention layer
transformer_enc_dec_attn_dropout_rate: 0.1 # dropout rate for transformer encoder-decoder attention layer
num_heads_applied_guided_attn: 2 # number of heads to apply guided attention loss
num_layers_applied_guided_attn: 2 # number of layers to apply guided attention loss
###########################################################
# UPDATER SETTING #
###########################################################
updater:
use_masking: True # whether to apply masking for padded part in loss calculation
loss_type: L1
use_guided_attn_loss: True # whether to use guided attention loss
guided_attn_loss_sigma: 0.4 # sigma in guided attention loss
guided_attn_loss_lambda: 10.0 # lambda in guided attention loss
modules_applied_guided_attn: ["encoder-decoder"] # modules to apply guided attention loss
bce_pos_weight: 5.0 # weight of positive sample in binary cross entropy calculation
##########################################################
# OPTIMIZER & SCHEDULER SETTING #
##########################################################
optimizer:
optim: adam # optimizer type
learning_rate: 0.001 # learning rate
###########################################################
# TRAINING SETTING #
###########################################################
max_epoch: 500
num_snapshots: 5
###########################################################
# OTHER SETTING #
###########################################################
seed: 10086