fs : 22050 # Hz, sample rate n_fft : 1024 # FFT size (samples). win_length : 1024 # Window length (samples). 46.4ms n_shift : 256 # Hop size (samples). 11.6ms fmin : 0 # Hz, min frequency when converting to mel fmax : 8000 # Hz, max frequency when converting to mel n_mels : 80 # mel bands window: "hann" # Window function. ########################################################### # DATA SETTING # ########################################################### batch_size: 16 num_workers: 2 ########################################################## # TTS MODEL SETTING # ########################################################## tts: transformertts # model architecture model: # keyword arguments for the selected model embed_dim: 0 # embedding dimension in encoder prenet eprenet_conv_layers: 0 # number of conv layers in encoder prenet # if set to 0, no encoder prenet will be used eprenet_conv_filts: 0 # filter size of conv layers in encoder prenet eprenet_conv_chans: 0 # number of channels of conv layers in encoder prenet dprenet_layers: 2 # number of layers in decoder prenet dprenet_units: 256 # number of units in decoder prenet adim: 512 # attention dimension aheads: 8 # number of attention heads elayers: 6 # number of encoder layers eunits: 1024 # number of encoder ff units dlayers: 6 # number of decoder layers dunits: 1024 # number of decoder ff units positionwise_layer_type: conv1d # type of position-wise layer positionwise_conv_kernel_size: 1 # kernel size of position wise conv layer postnet_layers: 5 # number of layers of postnset postnet_filts: 5 # filter size of conv layers in postnet postnet_chans: 256 # number of channels of conv layers in postnet use_scaled_pos_enc: True # whether to use scaled positional encoding encoder_normalize_before: True # whether to perform layer normalization before the input decoder_normalize_before: True # whether to perform layer normalization before the input reduction_factor: 1 # reduction factor init_type: xavier_uniform # initialization type init_enc_alpha: 1.0 # initial value of alpha of encoder scaled position encoding init_dec_alpha: 1.0 # initial value of alpha of decoder scaled position encoding eprenet_dropout_rate: 0.0 # dropout rate for encoder prenet dprenet_dropout_rate: 0.5 # dropout rate for decoder prenet postnet_dropout_rate: 0.5 # dropout rate for postnet transformer_enc_dropout_rate: 0.1 # dropout rate for transformer encoder layer transformer_enc_positional_dropout_rate: 0.1 # dropout rate for transformer encoder positional encoding transformer_enc_attn_dropout_rate: 0.1 # dropout rate for transformer encoder attention layer transformer_dec_dropout_rate: 0.1 # dropout rate for transformer decoder layer transformer_dec_positional_dropout_rate: 0.1 # dropout rate for transformer decoder positional encoding transformer_dec_attn_dropout_rate: 0.1 # dropout rate for transformer decoder attention layer transformer_enc_dec_attn_dropout_rate: 0.1 # dropout rate for transformer encoder-decoder attention layer num_heads_applied_guided_attn: 2 # number of heads to apply guided attention loss num_layers_applied_guided_attn: 2 # number of layers to apply guided attention loss ########################################################### # UPDATER SETTING # ########################################################### updater: use_masking: true # whether to apply masking for padded part in loss calculation loss_type: L1 use_guided_attn_loss: true # whether to use guided attention loss guided_attn_loss_sigma: 0.4 # sigma in guided attention loss guided_attn_loss_lambda: 10.0 # lambda in guided attention loss modules_applied_guided_attn: ["encoder-decoder"] # modules to apply guided attention loss bce_pos_weight: 5.0 # weight of positive sample in binary cross entropy calculation ########################################################## # OPTIMIZER & SCHEDULER SETTING # ########################################################## optimizer: optim: adam # optimizer type learning_rate: 0.001 # learning rate ########################################################### # TRAINING SETTING # ########################################################### max_epoch: 500 num_snapshots: 5 ########################################################### # OTHER SETTING # ########################################################### seed: 10086