You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

57 lines
2.0 KiB

. ./
set -e
# stage 0: data prepare, including voxceleb1 download and generate {train,dev,enroll,test}.csv
# voxceleb2 data is m4a format, so we need user to convert the m4a to wav yourselves as described in
# stage 1: train the speaker identification model
# stage 2: test speaker identification
# stage 3: extract the training embeding to train the LDA and PLDA
# you can set the variable PPAUDIO_HOME to specifiy the downloaded the vox1 and vox2 dataset
# default the dataset is the ~/.paddleaudio/
# export PPAUDIO_HOME=
dir=data.bak/ # data directory
exp_dir=exp/ecapa-tdnn/ # experiment directory
mkdir -p ${dir}
mkdir -p ${exp_dir}
if [ $stage -le 0 ]; then
# stage 0: data prepare for vox1 and vox2, vox2 must be converted from m4a to wav
python3 local/ --data-dir ${dir} --augment
if [ $stage -le 1 ]; then
# stage 1: train the speaker identification model
python3 \
-m paddle.distributed.launch --gpus=0,1,2,3 \
${BIN_DIR}/ --device "gpu" --checkpoint-dir ${exp_dir} --augment \
--data-dir ${dir} --config conf/ecapa_tdnn.yaml
if [ $stage -le 2 ]; then
# stage 1: get the speaker verification scores with cosine function
python3 \
--config conf/ecapa_tdnn.yaml \
--data-dir ${dir} --load-checkpoint ${exp_dir}/epoch_10/
if [ $stage -le 3 ]; then
# stage 3: extract the audio embedding
python3 \
--config conf/ecapa_tdnn.yaml \
--audio-path "demo/csv/00001.wav" --load-checkpoint ${exp_dir}/epoch_60/
# if [ $stage -le 3 ]; then
# # stage 2: extract the training embeding to train the LDA and PLDA
# # todo: extract the training embedding
# fi