PaddleSpeech/examples/voxceleb/sv0/run.sh

#!/bin/bash
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

. ./path.sh
set -e

#######################################################################
# stage 0: data prepare, including voxceleb1 download and generate {train,dev,enroll,test}.csv
#          voxceleb2 data is m4a format, so we need user to convert the m4a to wav yourselves as described in Readme.md with the script local/convert.sh
# stage 1: train the speaker identification model
# stage 2: test speaker identification 
# stage 3: extract the training embeding to train the LDA and PLDA
######################################################################

# we can set the variable PPAUDIO_HOME to specifiy the root directory of the downloaded vox1 and vox2 dataset 
# default the dataset will be stored in the ~/.paddleaudio/
# the vox2 dataset is stored in m4a format, we need to convert the audio from m4a to wav yourself
# and put all of them to ${PPAUDIO_HOME}/datasets/vox2
# we will find the wav from ${PPAUDIO_HOME}/datasets/vox1/wav and ${PPAUDIO_HOME}/datasets/vox2/wav
# export PPAUDIO_HOME=
stage=0
stop_stage=50

# data directory
# if we set the variable ${dir}, we will store the wav info to this directory
# otherwise, we will store the wav info to vox1 and vox2 directory respectively
# vox2 wav path, we must convert the m4a format to wav format 
# dir=data-demo/                          # data info directory    
dir=demo/                          # data info directory   

exp_dir=exp/ecapa-tdnn-vox12-big//            # experiment directory
conf_path=conf/ecapa_tdnn.yaml          
gpus=0,1,2,3

source ${MAIN_ROOT}/utils/parse_options.sh || exit 1;

mkdir -p ${exp_dir}

if [ $stage -le 0 ] && [ ${stop_stage} -ge 0 ]; then 
     # stage 0: data prepare for vox1 and vox2, vox2 must be converted from m4a to wav
     # and we should specifiy the vox2 data in the data.sh
     bash ./local/data.sh ${dir} ${conf_path}|| exit -1;
fi 

if [ $stage -le 1 ] && [ ${stop_stage} -ge 1 ]; then
     # stage 1: train the speaker identification model
     CUDA_VISIBLE_DEVICES=${gpus} bash ./local/train.sh ${dir} ${exp_dir} ${conf_path} 
fi

if [ $stage -le 2 ]; then
     # stage 2: get the speaker verification scores with cosine function
     #          now we only support use cosine to get the scores
     CUDA_VISIBLE_DEVICES=0 bash ./local/test.sh ${dir} ${exp_dir} ${conf_path}
fi

# if [ $stage -le 3 ]; then
#      # stage 2: extract the training embeding to train the LDA and PLDA
#      # todo: extract the training embedding
# fi
add voxceleb1 data prepare 3 years ago			`#!/bin/bash`
add vox2 data into VoxCeleb class 3 years ago			`# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.`
			`#`
			`# Licensed under the Apache License, Version 2.0 (the "License");`
			`# you may not use this file except in compliance with the License.`
			`# You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`
merge develop to vox12, test=doc 3 years ago
add voxceleb1 data prepare 3 years ago			`. ./path.sh`
			`set -e`
repair the variable name bug 3 years ago
add speaker verification using cosine score, test=doc 3 years ago			`#######################################################################`
check extract embedding result, test=doc 3 years ago			`# stage 0: data prepare, including voxceleb1 download and generate {train,dev,enroll,test}.csv`
change the code style to s2t code style, test=doc 3 years ago			`# voxceleb2 data is m4a format, so we need user to convert the m4a to wav yourselves as described in Readme.md with the script local/convert.sh`
add speaker verification using cosine score, test=doc 3 years ago			`# stage 1: train the speaker identification model`
			`# stage 2: test speaker identification`
			`# stage 3: extract the training embeding to train the LDA and PLDA`
			`######################################################################`

add vox2 data into VoxCeleb class 3 years ago			`# we can set the variable PPAUDIO_HOME to specifiy the root directory of the downloaded vox1 and vox2 dataset`
			`# default the dataset will be stored in the ~/.paddleaudio/`
			`# the vox2 dataset is stored in m4a format, we need to convert the audio from m4a to wav yourself`
			`# and put all of them to ${PPAUDIO_HOME}/datasets/vox2`
			`# we will find the wav from ${PPAUDIO_HOME}/datasets/vox1/wav and ${PPAUDIO_HOME}/datasets/vox2/wav`
add speaker verification using cosine score, test=doc 3 years ago			`# export PPAUDIO_HOME=`
check extract embedding result, test=doc 3 years ago			`stage=0`
change the code style to s2t code style, test=doc 3 years ago			`stop_stage=50`

add vox2 data into VoxCeleb class 3 years ago			`# data directory`
			`# if we set the variable ${dir}, we will store the wav info to this directory`
			`# otherwise, we will store the wav info to vox1 and vox2 directory respectively`
			`# vox2 wav path, we must convert the m4a format to wav format`
change the code style to s2t code style, test=doc 3 years ago			`# dir=data-demo/ # data info directory`
			`dir=demo/ # data info directory`

			`exp_dir=exp/ecapa-tdnn-vox12-big// # experiment directory`
			`conf_path=conf/ecapa_tdnn.yaml`
			`gpus=0,1,2,3`

			`source ${MAIN_ROOT}/utils/parse_options.sh \|\| exit 1;`

check extract embedding result, test=doc 3 years ago			`mkdir -p ${exp_dir}`

change the code style to s2t code style, test=doc 3 years ago			`if [ $stage -le 0 ] && [ ${stop_stage} -ge 0 ]; then`
add state 0 to prepare the voxcele data and augment data 3 years ago			`# stage 0: data prepare for vox1 and vox2, vox2 must be converted from m4a to wav`
change the code style to s2t code style, test=doc 3 years ago			`# and we should specifiy the vox2 data in the data.sh`
			`bash ./local/data.sh ${dir} ${conf_path}\|\| exit -1;`
add state 0 to prepare the voxcele data and augment data 3 years ago			`fi`
add speaker verification using cosine score, test=doc 3 years ago
change the code style to s2t code style, test=doc 3 years ago			`if [ $stage -le 1 ] && [ ${stop_stage} -ge 1 ]; then`
add speaker verification using cosine score, test=doc 3 years ago			`# stage 1: train the speaker identification model`
change the code style to s2t code style, test=doc 3 years ago			`CUDA_VISIBLE_DEVICES=${gpus} bash ./local/train.sh ${dir} ${exp_dir} ${conf_path}`
add speaker verification using cosine score, test=doc 3 years ago			`fi`

			`if [ $stage -le 2 ]; then`
change the code style to s2t code style, test=doc 3 years ago			`# stage 2: get the speaker verification scores with cosine function`
			`# now we only support use cosine to get the scores`
			`CUDA_VISIBLE_DEVICES=0 bash ./local/test.sh ${dir} ${exp_dir} ${conf_path}`
add speaker verification using cosine score, test=doc 3 years ago			`fi`

check extract embedding result, test=doc 3 years ago			`# if [ $stage -le 3 ]; then`
			`# # stage 2: extract the training embeding to train the LDA and PLDA`
			`# # todo: extract the training embedding`
			`# fi`