parent
206d9e5663
commit
0207ac00bb
@ -1,109 +0,0 @@
|
|||||||
###########################################################
|
|
||||||
# FEATURE EXTRACTION SETTING #
|
|
||||||
###########################################################
|
|
||||||
|
|
||||||
fs: 24000 # sr
|
|
||||||
n_fft: 2048 # FFT size (samples).
|
|
||||||
n_shift: 300 # Hop size (samples). 12.5ms
|
|
||||||
win_length: 1200 # Window length (samples). 50ms
|
|
||||||
# If set to null, it will be the same as fft_size.
|
|
||||||
window: "hann" # Window function.
|
|
||||||
|
|
||||||
# Only used for feats_type != raw
|
|
||||||
|
|
||||||
fmin: 80 # Minimum frequency of Mel basis.
|
|
||||||
fmax: 7600 # Maximum frequency of Mel basis.
|
|
||||||
n_mels: 80 # The number of mel basis.
|
|
||||||
|
|
||||||
# Only used for the model using pitch features (e.g. FastSpeech2)
|
|
||||||
f0min: 80 # Minimum f0 for pitch extraction.
|
|
||||||
f0max: 400 # Maximum f0 for pitch extraction.
|
|
||||||
|
|
||||||
|
|
||||||
###########################################################
|
|
||||||
# DATA SETTING #
|
|
||||||
###########################################################
|
|
||||||
batch_size: 64
|
|
||||||
num_workers: 4
|
|
||||||
|
|
||||||
|
|
||||||
###########################################################
|
|
||||||
# MODEL SETTING #
|
|
||||||
###########################################################
|
|
||||||
model:
|
|
||||||
adim: 384 # attention dimension
|
|
||||||
aheads: 2 # number of attention heads
|
|
||||||
elayers: 4 # number of encoder layers
|
|
||||||
eunits: 1536 # number of encoder ff units
|
|
||||||
dlayers: 4 # number of decoder layers
|
|
||||||
dunits: 1536 # number of decoder ff units
|
|
||||||
positionwise_layer_type: conv1d # type of position-wise layer
|
|
||||||
positionwise_conv_kernel_size: 3 # kernel size of position wise conv layer
|
|
||||||
duration_predictor_layers: 2 # number of layers of duration predictor
|
|
||||||
duration_predictor_chans: 256 # number of channels of duration predictor
|
|
||||||
duration_predictor_kernel_size: 3 # filter size of duration predictor
|
|
||||||
postnet_layers: 5 # number of layers of postnset
|
|
||||||
postnet_filts: 5 # filter size of conv layers in postnet
|
|
||||||
postnet_chans: 256 # number of channels of conv layers in postnet
|
|
||||||
encoder_normalize_before: True # whether to perform layer normalization before the input
|
|
||||||
decoder_normalize_before: True # whether to perform layer normalization before the input
|
|
||||||
reduction_factor: 1 # reduction factor
|
|
||||||
encoder_type: conformer # encoder type
|
|
||||||
decoder_type: conformer # decoder type
|
|
||||||
conformer_pos_enc_layer_type: rel_pos # conformer positional encoding type
|
|
||||||
conformer_self_attn_layer_type: rel_selfattn # conformer self-attention type
|
|
||||||
conformer_activation_type: swish # conformer activation type
|
|
||||||
use_macaron_style_in_conformer: True # whether to use macaron style in conformer
|
|
||||||
use_cnn_in_conformer: True # whether to use CNN in conformer
|
|
||||||
conformer_enc_kernel_size: 7 # kernel size in CNN module of conformer-based encoder
|
|
||||||
conformer_dec_kernel_size: 31 # kernel size in CNN module of conformer-based decoder
|
|
||||||
init_type: xavier_uniform # initialization type
|
|
||||||
transformer_enc_dropout_rate: 0.2 # dropout rate for transformer encoder layer
|
|
||||||
transformer_enc_positional_dropout_rate: 0.2 # dropout rate for transformer encoder positional encoding
|
|
||||||
transformer_enc_attn_dropout_rate: 0.2 # dropout rate for transformer encoder attention layer
|
|
||||||
transformer_dec_dropout_rate: 0.2 # dropout rate for transformer decoder layer
|
|
||||||
transformer_dec_positional_dropout_rate: 0.2 # dropout rate for transformer decoder positional encoding
|
|
||||||
transformer_dec_attn_dropout_rate: 0.2 # dropout rate for transformer decoder attention layer
|
|
||||||
pitch_predictor_layers: 5 # number of conv layers in pitch predictor
|
|
||||||
pitch_predictor_chans: 256 # number of channels of conv layers in pitch predictor
|
|
||||||
pitch_predictor_kernel_size: 5 # kernel size of conv leyers in pitch predictor
|
|
||||||
pitch_predictor_dropout: 0.5 # dropout rate in pitch predictor
|
|
||||||
pitch_embed_kernel_size: 1 # kernel size of conv embedding layer for pitch
|
|
||||||
pitch_embed_dropout: 0.0 # dropout rate after conv embedding layer for pitch
|
|
||||||
stop_gradient_from_pitch_predictor: True # whether to stop the gradient from pitch predictor to encoder
|
|
||||||
energy_predictor_layers: 2 # number of conv layers in energy predictor
|
|
||||||
energy_predictor_chans: 256 # number of channels of conv layers in energy predictor
|
|
||||||
energy_predictor_kernel_size: 3 # kernel size of conv leyers in energy predictor
|
|
||||||
energy_predictor_dropout: 0.5 # dropout rate in energy predictor
|
|
||||||
energy_embed_kernel_size: 1 # kernel size of conv embedding layer for energy
|
|
||||||
energy_embed_dropout: 0.0 # dropout rate after conv embedding layer for energy
|
|
||||||
stop_gradient_from_energy_predictor: False # whether to stop the gradient from energy predictor to encoder
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
###########################################################
|
|
||||||
# UPDATER SETTING #
|
|
||||||
###########################################################
|
|
||||||
updater:
|
|
||||||
use_masking: True # whether to apply masking for padded part in loss calculation
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
###########################################################
|
|
||||||
# OPTIMIZER SETTING #
|
|
||||||
###########################################################
|
|
||||||
optimizer:
|
|
||||||
optim: adam # optimizer type
|
|
||||||
learning_rate: 0.001 # learning rate
|
|
||||||
|
|
||||||
###########################################################
|
|
||||||
# TRAINING SETTING #
|
|
||||||
###########################################################
|
|
||||||
max_epoch: 1000
|
|
||||||
num_snapshots: 5
|
|
||||||
|
|
||||||
|
|
||||||
###########################################################
|
|
||||||
# OTHER SETTING #
|
|
||||||
###########################################################
|
|
||||||
seed: 10086
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in new issue