PaddleSpeech/examples/voxceleb/sv0/conf/ecapa_tdnn.yaml

###########################################
#                Data                 #
###########################################
augment: True
batch_size: 32
num_workers: 2
num_speakers: 7205 # 1211 vox1, 5994 vox2, 7205 vox1+2, test speakers: 41
shuffle: True
skip_prep: False
split_ratio: 0.9
chunk_duration: 3.0 # seconds
random_chunk: True
verification_file: data/vox1/veri_test2.txt

###########################################################
#                FEATURE EXTRACTION SETTING               #
###########################################################
# currently, we only support fbank
sr: 16000           # sample rate
n_mels: 80
window_size: 400     #25ms, sample rate 16000, 25 * 16000 / 1000 = 400 
hop_size: 160        #10ms, sample rate 16000, 10 * 16000 / 1000 = 160

###########################################################
#                       MODEL SETTING                     #
###########################################################
# currently, we only support ecapa-tdnn in the ecapa_tdnn.yaml
# if we want use another model, please choose another configuration yaml file
model:
  input_size: 80
  channels: [1024, 1024, 1024, 1024, 3072]
  kernel_sizes: [5, 3, 3, 3, 1]
  dilations: [1, 2, 3, 4, 1]
  attention_channels: 128
  lin_neurons: 192

###########################################
#                Training                 #
###########################################
seed: 1986 # according from speechbrain configuration
epochs: 10
save_interval: 10
log_interval: 10
learning_rate: 1e-8
max_lr: 1e-3
step_size: 140000


###########################################
#                loss                     #
###########################################
margin: 0.2
scale: 30

###########################################
#                Testing                  #
###########################################
global_embedding_norm: True
embedding_mean_norm: True
embedding_std_norm: False

###########################################
#                score-norm               #
###########################################
score_norm: s-norm
cohort_size: 20000 # amount of imposter utterances in normalization cohort
n_train_snts: 400000 # used for normalization stats
add some comments to the code 3 years ago			`###########################################`
			`# Data #`
			`###########################################`
change the code style to s2t code style, test=doc 3 years ago			`augment: True`
add vector csv dataset format, test=doc 3 years ago			`batch_size: 32`
add some comments to the code 3 years ago			`num_workers: 2`
add vector necessary note, test=doc 3 years ago			`num_speakers: 7205 # 1211 vox1, 5994 vox2, 7205 vox1+2, test speakers: 41`
add some comments to the code 3 years ago			`shuffle: True`
convert rirs noise to csv file 3 years ago			`skip_prep: False`
convert jsonfile to csv file 3 years ago			`split_ratio: 0.9`
			`chunk_duration: 3.0 # seconds`
add some comments to the code 3 years ago			`random_chunk: True`
convert jsonfile to csv file 3 years ago			`verification_file: data/vox1/veri_test2.txt`
add some comments to the code 3 years ago
add ecapa-tdnn config yaml file 3 years ago			`###########################################################`
			`# FEATURE EXTRACTION SETTING #`
			`###########################################################`
			`# currently, we only support fbank`
add vector cli component, test=doc 3 years ago			`sr: 16000 # sample rate`
change the code style to s2t code style, test=doc 3 years ago			`n_mels: 80`
			`window_size: 400 #25ms, sample rate 16000, 25 * 16000 / 1000 = 400`
add vector cli component, test=doc 3 years ago			`hop_size: 160 #10ms, sample rate 16000, 10 * 16000 / 1000 = 160`
add ecapa-tdnn config yaml file 3 years ago
			`###########################################################`
			`# MODEL SETTING #`
			`###########################################################`
			`# currently, we only support ecapa-tdnn in the ecapa_tdnn.yaml`
			`# if we want use another model, please choose another configuration yaml file`
			`model:`
			`input_size: 80`
add some comments to the code 3 years ago			`channels: [1024, 1024, 1024, 1024, 3072]`
add ecapa-tdnn config yaml file 3 years ago			`kernel_sizes: [5, 3, 3, 3, 1]`
			`dilations: [1, 2, 3, 4, 1]`
			`attention_channels: 128`
			`lin_neurons: 192`

			`###########################################`
			`# Training #`
			`###########################################`
add some comments to the code 3 years ago			`seed: 1986 # according from speechbrain configuration`
add ecapa-tdnn config yaml file 3 years ago			`epochs: 10`
refactor voxceleb2 data download, test=doc 3 years ago			`save_interval: 10`
			`log_interval: 10`
add ecapa-tdnn config yaml file 3 years ago			`learning_rate: 1e-8`
add vector necessary note, test=doc 3 years ago			`max_lr: 1e-3`
			`step_size: 140000`
change the code style to s2t code style, test=doc 3 years ago

add vector necessary note, test=doc 3 years ago			`###########################################`
			`# loss #`
			`###########################################`
			`margin: 0.2`
			`scale: 30`

change the code style to s2t code style, test=doc 3 years ago			`###########################################`
			`# Testing #`
			`###########################################`
			`global_embedding_norm: True`
			`embedding_mean_norm: True`
			`embedding_std_norm: False`

update the vector model, test=doc 2 years ago			`###########################################`
			`# score-norm #`
			`###########################################`
			`score_norm: s-norm`
			`cohort_size: 20000 # amount of imposter utterances in normalization cohort`
			`n_train_snts: 400000 # used for normalization stats`