You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
135 lines
4.3 KiB
135 lines
4.3 KiB
###########################################################
|
|
# FEATURE EXTRACTION SETTING #
|
|
###########################################################
|
|
# 源码 load 的时候用的 24k, 提取 mel 用的 16k, 后续 load 和提取 mel 都要改成 24k
|
|
fs: 16000
|
|
n_fft: 2048
|
|
n_shift: 300
|
|
win_length: 1200 # Window length.(in samples) 50ms
|
|
# If set to null, it will be the same as fft_size.
|
|
window: "hann" # Window function.
|
|
|
|
fmin: 0 # Minimum frequency of Mel basis.
|
|
fmax: 8000 # Maximum frequency of Mel basis. sr // 2
|
|
n_mels: 80
|
|
# only for StarGANv2 VC
|
|
norm: # None here
|
|
htk: True
|
|
power: 2.0
|
|
|
|
|
|
###########################################################
|
|
# MODEL SETTING #
|
|
###########################################################
|
|
generator_params:
|
|
dim_in: 64
|
|
style_dim: 64
|
|
max_conv_dim: 512
|
|
w_hpf: 0
|
|
F0_channel: 256
|
|
mapping_network_params:
|
|
num_domains: 20 # num of speakers in StarGANv2
|
|
latent_dim: 16
|
|
style_dim: 64 # same as style_dim in generator_params
|
|
hidden_dim: 512 # same as max_conv_dim in generator_params
|
|
style_encoder_params:
|
|
dim_in: 64 # same as dim_in in generator_params
|
|
style_dim: 64 # same as style_dim in generator_params
|
|
num_domains: 20 # same as num_domains in generator_params
|
|
max_conv_dim: 512 # same as max_conv_dim in generator_params
|
|
discriminator_params:
|
|
dim_in: 64 # same as dim_in in generator_params
|
|
num_domains: 20 # same as num_domains in mapping_network_params
|
|
max_conv_dim: 512 # same as max_conv_dim in generator_params
|
|
repeat_num: 4
|
|
asr_params:
|
|
input_dim: 80
|
|
hidden_dim: 256
|
|
n_token: 80
|
|
token_embedding_dim: 256
|
|
|
|
###########################################################
|
|
# ADVERSARIAL LOSS SETTING #
|
|
###########################################################
|
|
loss_params:
|
|
g_loss:
|
|
lambda_sty: 1.
|
|
lambda_cyc: 5.
|
|
lambda_ds: 1.
|
|
lambda_norm: 1.
|
|
lambda_asr: 10.
|
|
lambda_f0: 5.
|
|
lambda_f0_sty: 0.1
|
|
lambda_adv: 2.
|
|
lambda_adv_cls: 0.5
|
|
norm_bias: 0.5
|
|
d_loss:
|
|
lambda_reg: 1.
|
|
lambda_adv_cls: 0.1
|
|
lambda_con_reg: 10.
|
|
|
|
adv_cls_epoch: 50
|
|
con_reg_epoch: 30
|
|
|
|
|
|
###########################################################
|
|
# DATA LOADER SETTING #
|
|
###########################################################
|
|
batch_size: 5 # Batch size.
|
|
num_workers: 2 # Number of workers in DataLoader.
|
|
max_mel_length: 192
|
|
|
|
###########################################################
|
|
# OPTIMIZER & SCHEDULER SETTING #
|
|
###########################################################
|
|
generator_optimizer_params:
|
|
beta1: 0.0
|
|
beta2: 0.99
|
|
weight_decay: 1.0e-4
|
|
epsilon: 1.0e-9
|
|
generator_scheduler_params:
|
|
max_learning_rate: 2.0e-4
|
|
phase_pct: 0.0
|
|
divide_factor: 1
|
|
total_steps: 200000 # train_max_steps
|
|
end_learning_rate: 2.0e-4
|
|
style_encoder_optimizer_params:
|
|
beta1: 0.0
|
|
beta2: 0.99
|
|
weight_decay: 1.0e-4
|
|
epsilon: 1.0e-9
|
|
style_encoder_scheduler_params:
|
|
max_learning_rate: 2.0e-4
|
|
phase_pct: 0.0
|
|
divide_factor: 1
|
|
total_steps: 200000 # train_max_steps
|
|
end_learning_rate: 2.0e-4
|
|
mapping_network_optimizer_params:
|
|
beta1: 0.0
|
|
beta2: 0.99
|
|
weight_decay: 1.0e-4
|
|
epsilon: 1.0e-9
|
|
mapping_network_scheduler_params:
|
|
max_learning_rate: 2.0e-6
|
|
phase_pct: 0.0
|
|
divide_factor: 1
|
|
total_steps: 200000 # train_max_steps
|
|
end_learning_rate: 2.0e-6
|
|
discriminator_optimizer_params:
|
|
beta1: 0.0
|
|
beta2: 0.99
|
|
weight_decay: 1.0e-4
|
|
epsilon: 1.0e-9
|
|
discriminator_scheduler_params:
|
|
max_learning_rate: 2.0e-4
|
|
phase_pct: 0.0
|
|
divide_factor: 1
|
|
total_steps: 200000 # train_max_steps
|
|
end_learning_rate: 2.0e-4
|
|
|
|
###########################################################
|
|
# TRAINING SETTING #
|
|
###########################################################
|
|
max_epoch: 150
|
|
num_snapshots: 5
|
|
seed: 1 |