# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from yacs.config import CfgNode as CN _C = CN() _C.data = CN( dict( batch_size=32, # batch size valid_size=64, # the first N examples are reserved for validation sample_rate=22050, # Hz, sample rate n_fft=1024, # fft frame size win_length=1024, # window size hop_length=256, # hop size between ajacent frame fmax=8000, # Hz, max frequency when converting to mel fmin=0, # Hz, min frequency when converting to mel n_mels=80, # mel bands padding_idx=0, # text embedding's padding index )) _C.model = CN( dict( vocab_size=37, # set this according to the frontend's vocab size n_tones=None, reduction_factor=1, # reduction factor d_encoder=512, # embedding & encoder's internal size encoder_conv_layers=3, # number of conv layer in tacotron2 encoder encoder_kernel_size=5, # kernel size of conv layers in tacotron2 encoder d_prenet=256, # hidden size of decoder prenet d_attention_rnn=1024, # hidden size of the first rnn layer in tacotron2 decoder d_decoder_rnn=1024, # hidden size of the second rnn layer in tacotron2 decoder d_attention=128, # hidden size of decoder location linear layer attention_filters=32, # number of filter in decoder location conv layer attention_kernel_size=31, # kernel size of decoder location conv layer d_postnet=512, # hidden size of decoder postnet postnet_kernel_size=5, # kernel size of conv layers in postnet postnet_conv_layers=5, # number of conv layer in decoder postnet p_encoder_dropout=0.5, # droput probability in encoder p_prenet_dropout=0.5, # droput probability in decoder prenet p_attention_dropout=0.1, # droput probability of first rnn layer in decoder p_decoder_dropout=0.1, # droput probability of second rnn layer in decoder p_postnet_dropout=0.5, # droput probability in decoder postnet d_global_condition=None, use_stop_token=True, # wherther to use binary classifier to predict when to stop use_guided_attention_loss=False, # whether to use guided attention loss guided_attention_loss_sigma=0.2 # sigma in guided attention loss )) _C.training = CN( dict( lr=1e-3, # learning rate weight_decay=1e-6, # the coeff of weight decay grad_clip_thresh=1.0, # the clip norm of grad clip. plot_interval=1000, # plot attention and spectrogram valid_interval=1000, # validation save_interval=1000, # checkpoint max_iteration=500000, # max iteration to train )) def get_cfg_defaults(): """Get a yacs CfgNode object with default values for my_project.""" # Return a clone so that the defaults will not be altered # This is for the "local variable" use pattern return _C.clone()