code format, test=doc

pull/1651/head
ccrrong 2 years ago
parent 378fe5909f
commit 7a03f36548

@ -1,13 +1,3 @@
# ##################################################
# Model: Speaker Diarization Baseline
# Embeddings: Deep embedding
# Clustering Technique: Spectral clustering
# Authors: Nauman Dawalatabad 2020
# #################################################
seed: 1234
num_speakers: 7205
###########################################################
# AMI DATA PREPARE SETTING #
###########################################################
@ -44,6 +34,7 @@ hop_size: 160 #10ms, sample rate 16000, 10 * 16000 / 1000 = 160
###########################################################
# currently, we only support ecapa-tdnn in the ecapa_tdnn.yaml
# if we want use another model, please choose another configuration yaml file
seed: 1234
emb_dim: 192
batch_size: 16
model:

@ -94,7 +94,7 @@ def main(args, config):
# stage2: build the speaker verification eval instance with backbone model
model = SpeakerIdetification(
backbone=ecapa_tdnn, num_class=config.num_speakers)
backbone=ecapa_tdnn, num_class=1)
# stage3: load the pre-trained model
# we get the last model from the epoch and save_interval
@ -228,6 +228,5 @@ if __name__ == "__main__":
config.merge_from_file(args.config)
config.freeze()
print(config)
main(args, config)

@ -1,49 +0,0 @@
#!/bin/bash
stage=1
TARGET_DIR=${MAIN_ROOT}/dataset/ami
data_folder=${TARGET_DIR}/amicorpus #e.g., /path/to/amicorpus/
manual_annot_folder=${TARGET_DIR}/ami_public_manual_1.6.2 #e.g., /path/to/ami_public_manual_1.6.2/
save_folder=${MAIN_ROOT}/examples/ami/sd0/data
ref_rttm_dir=${save_folder}/ref_rttms
meta_data_dir=${save_folder}/metadata
set=L
. ${MAIN_ROOT}/utils/parse_options.sh || exit 1;
set -u
set -o pipefail
mkdir -p ${save_folder}
if [ ${stage} -le 0 ]; then
# Download AMI corpus, You need around 10GB of free space to get whole data
# The signals are too large to package in this way,
# so you need to use the chooser to indicate which ones you wish to download
echo "Please follow https://groups.inf.ed.ac.uk/ami/download/ to download the data."
echo "Annotations: AMI manual annotations v1.6.2 "
echo "Signals: "
echo "1) Select one or more AMI meetings: the IDs please follow ./ami_split.py"
echo "2) Select media streams: Just select Headset mix"
exit 0;
fi
if [ ${stage} -le 1 ]; then
echo "AMI Data preparation"
python local/ami_prepare.py --data_folder ${data_folder} \
--manual_annot_folder ${manual_annot_folder} \
--save_folder ${save_folder} --ref_rttm_dir ${ref_rttm_dir} \
--meta_data_dir ${meta_data_dir}
if [ $? -ne 0 ]; then
echo "Prepare AMI failed. Please check log message."
exit 1
fi
fi
echo "AMI data preparation done."
exit 0

@ -1,22 +1,16 @@
#!/usr/bin/python3
"""This recipe implements diarization system using deep embedding extraction followed by spectral clustering.
To run this recipe:
> python experiment.py hparams/<your_hyperparams_file.yaml>
e.g., python experiment.py hparams/ecapa_tdnn.yaml
Condition: Oracle VAD (speech regions taken from the groundtruth).
Note: There are multiple ways to write this recipe. We iterate over individual recordings.
This approach is less GPU memory demanding and also makes code easy to understand.
Citation: This recipe is based on the following paper,
N. Dawalatabad, M. Ravanelli, F. Grondin, J. Thienpondt, B. Desplanques, H. Na,
"ECAPA-TDNN Embeddings for Speaker Diarization," arXiv:2104.01466, 2021.
Authors
* Nauman Dawalatabad 2020
"""
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import glob
import json
@ -81,10 +75,6 @@ def diarize_dataset(
# diarizing different recordings in a dataset.
for rec_id in tqdm(all_rec_ids):
# this tag will be displayed in the log.
if rec_id == "IS1008a":
continue
if rec_id == "ES2011a":
continue
tag = ("[" + str(split_type) + ": " + str(i) + "/" +
str(len(all_rec_ids)) + "]")
i = i + 1
@ -434,6 +424,5 @@ if __name__ == "__main__":
config.merge_from_file(args.config)
config.freeze()
print(config)
main(args, config)

@ -1,6 +1,6 @@
#!/bin/bash
stage=2
stage=0
set=L
. ${MAIN_ROOT}/utils/parse_options.sh || exit 1;

Loading…
Cancel
Save