########################################################### # FEATURE EXTRACTION SETTING # ########################################################### # 源码 load 的时候用的 24k, 提取 mel 用的 16k, 后续 load 和提取 mel 都要改成 24k fs: 16000 n_fft: 2048 n_shift: 300 win_length: 1200 # Window length.(in samples) 50ms # If set to null, it will be the same as fft_size. window: "hann" # Window function. fmin: 0 # Minimum frequency of Mel basis. fmax: 8000 # Maximum frequency of Mel basis. sr // 2 n_mels: 80 # only for StarGANv2 VC norm: # None here htk: True power: 2.0 ########################################################### # MODEL SETTING # ########################################################### generator_params: dim_in: 64 style_dim: 64 max_conv_dim: 512 w_hpf: 0 F0_channel: 256 mapping_network_params: num_domains: 20 # num of speakers in StarGANv2 latent_dim: 16 style_dim: 64 # same as style_dim in generator_params hidden_dim: 512 # same as max_conv_dim in generator_params style_encoder_params: dim_in: 64 # same as dim_in in generator_params style_dim: 64 # same as style_dim in generator_params num_domains: 20 # same as num_domains in generator_params max_conv_dim: 512 # same as max_conv_dim in generator_params discriminator_params: dim_in: 64 # same as dim_in in generator_params num_domains: 20 # same as num_domains in mapping_network_params max_conv_dim: 512 # same as max_conv_dim in generator_params repeat_num: 4 asr_params: input_dim: 80 hidden_dim: 256 n_token: 80 token_embedding_dim: 256 ########################################################### # ADVERSARIAL LOSS SETTING # ########################################################### loss_params: g_loss: lambda_sty: 1. lambda_cyc: 5. lambda_ds: 1. lambda_norm: 1. lambda_asr: 10. lambda_f0: 5. lambda_f0_sty: 0.1 lambda_adv: 2. lambda_adv_cls: 0.5 norm_bias: 0.5 d_loss: lambda_reg: 1. lambda_adv_cls: 0.1 lambda_con_reg: 10. adv_cls_epoch: 50 con_reg_epoch: 30 ########################################################### # DATA LOADER SETTING # ########################################################### batch_size: 5 # Batch size. num_workers: 2 # Number of workers in DataLoader. max_mel_length: 192 ########################################################### # OPTIMIZER & SCHEDULER SETTING # ########################################################### generator_optimizer_params: beta1: 0.0 beta2: 0.99 weight_decay: 1.0e-4 epsilon: 1.0e-9 generator_scheduler_params: max_learning_rate: 2.0e-4 phase_pct: 0.0 divide_factor: 1 total_steps: 200000 # train_max_steps end_learning_rate: 2.0e-4 style_encoder_optimizer_params: beta1: 0.0 beta2: 0.99 weight_decay: 1.0e-4 epsilon: 1.0e-9 style_encoder_scheduler_params: max_learning_rate: 2.0e-4 phase_pct: 0.0 divide_factor: 1 total_steps: 200000 # train_max_steps end_learning_rate: 2.0e-4 mapping_network_optimizer_params: beta1: 0.0 beta2: 0.99 weight_decay: 1.0e-4 epsilon: 1.0e-9 mapping_network_scheduler_params: max_learning_rate: 2.0e-6 phase_pct: 0.0 divide_factor: 1 total_steps: 200000 # train_max_steps end_learning_rate: 2.0e-6 discriminator_optimizer_params: beta1: 0.0 beta2: 0.99 weight_decay: 1.0e-4 epsilon: 1.0e-9 discriminator_scheduler_params: max_learning_rate: 2.0e-4 phase_pct: 0.0 divide_factor: 1 total_steps: 200000 # train_max_steps end_learning_rate: 2.0e-4 ########################################################### # TRAINING SETTING # ########################################################### max_epoch: 150 num_snapshots: 5 seed: 1