You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
PaddleSpeech/examples/vctk/vc3/conf/default.yaml

135 lines
4.3 KiB

###########################################################
# FEATURE EXTRACTION SETTING #
###########################################################
# 源码 load 的时候用的 24k, 提取 mel 用的 16k, 后续 load 和提取 mel 都要改成 24k
fs: 16000
n_fft: 2048
n_shift: 300
win_length: 1200 # Window length.(in samples) 50ms
# If set to null, it will be the same as fft_size.
window: "hann" # Window function.
fmin: 0 # Minimum frequency of Mel basis.
fmax: 8000 # Maximum frequency of Mel basis. sr // 2
n_mels: 80
# only for StarGANv2 VC
norm: # None here
htk: True
power: 2.0
###########################################################
# MODEL SETTING #
###########################################################
generator_params:
dim_in: 64
style_dim: 64
max_conv_dim: 512
w_hpf: 0
F0_channel: 256
mapping_network_params:
num_domains: 20 # num of speakers in StarGANv2
latent_dim: 16
style_dim: 64 # same as style_dim in generator_params
hidden_dim: 512 # same as max_conv_dim in generator_params
style_encoder_params:
dim_in: 64 # same as dim_in in generator_params
style_dim: 64 # same as style_dim in generator_params
num_domains: 20 # same as num_domains in generator_params
max_conv_dim: 512 # same as max_conv_dim in generator_params
discriminator_params:
dim_in: 64 # same as dim_in in generator_params
num_domains: 20 # same as num_domains in mapping_network_params
max_conv_dim: 512 # same as max_conv_dim in generator_params
repeat_num: 4
asr_params:
input_dim: 80
hidden_dim: 256
n_token: 80
token_embedding_dim: 256
###########################################################
# ADVERSARIAL LOSS SETTING #
###########################################################
loss_params:
g_loss:
lambda_sty: 1.
lambda_cyc: 5.
lambda_ds: 1.
lambda_norm: 1.
lambda_asr: 10.
lambda_f0: 5.
lambda_f0_sty: 0.1
lambda_adv: 2.
lambda_adv_cls: 0.5
norm_bias: 0.5
d_loss:
lambda_reg: 1.
lambda_adv_cls: 0.1
lambda_con_reg: 10.
adv_cls_epoch: 50
con_reg_epoch: 30
###########################################################
# DATA LOADER SETTING #
###########################################################
batch_size: 5 # Batch size.
num_workers: 2 # Number of workers in DataLoader.
max_mel_length: 192
###########################################################
# OPTIMIZER & SCHEDULER SETTING #
###########################################################
generator_optimizer_params:
beta1: 0.0
beta2: 0.99
weight_decay: 1.0e-4
epsilon: 1.0e-9
generator_scheduler_params:
max_learning_rate: 2.0e-4
phase_pct: 0.0
divide_factor: 1
total_steps: 200000 # train_max_steps
end_learning_rate: 2.0e-4
style_encoder_optimizer_params:
beta1: 0.0
beta2: 0.99
weight_decay: 1.0e-4
epsilon: 1.0e-9
style_encoder_scheduler_params:
max_learning_rate: 2.0e-4
phase_pct: 0.0
divide_factor: 1
total_steps: 200000 # train_max_steps
end_learning_rate: 2.0e-4
mapping_network_optimizer_params:
beta1: 0.0
beta2: 0.99
weight_decay: 1.0e-4
epsilon: 1.0e-9
mapping_network_scheduler_params:
max_learning_rate: 2.0e-6
phase_pct: 0.0
divide_factor: 1
total_steps: 200000 # train_max_steps
end_learning_rate: 2.0e-6
discriminator_optimizer_params:
beta1: 0.0
beta2: 0.99
weight_decay: 1.0e-4
epsilon: 1.0e-9
discriminator_scheduler_params:
max_learning_rate: 2.0e-4
phase_pct: 0.0
divide_factor: 1
total_steps: 200000 # train_max_steps
end_learning_rate: 2.0e-4
###########################################################
# TRAINING SETTING #
###########################################################
max_epoch: 150
num_snapshots: 5
seed: 1