From 7a03f36548aae74964d273af91dc943cc9175a4a Mon Sep 17 00:00:00 2001 From: ccrrong <1039058843@qq.com> Date: Tue, 5 Apr 2022 19:49:44 +0800 Subject: [PATCH] code format, test=doc --- examples/ami/sd0/conf/ecapa_tdnn.yaml | 11 +---- examples/ami/sd0/local/compute_embdding.py | 3 +- examples/ami/sd0/local/data.sh | 49 ---------------------- examples/ami/sd0/local/experiment.py | 37 ++++++---------- examples/ami/sd0/local/process.sh | 2 +- 5 files changed, 16 insertions(+), 86 deletions(-) delete mode 100755 examples/ami/sd0/local/data.sh diff --git a/examples/ami/sd0/conf/ecapa_tdnn.yaml b/examples/ami/sd0/conf/ecapa_tdnn.yaml index 0f298c35..319e4497 100755 --- a/examples/ami/sd0/conf/ecapa_tdnn.yaml +++ b/examples/ami/sd0/conf/ecapa_tdnn.yaml @@ -1,13 +1,3 @@ -# ################################################## -# Model: Speaker Diarization Baseline -# Embeddings: Deep embedding -# Clustering Technique: Spectral clustering -# Authors: Nauman Dawalatabad 2020 -# ################################################# - -seed: 1234 -num_speakers: 7205 - ########################################################### # AMI DATA PREPARE SETTING # ########################################################### @@ -44,6 +34,7 @@ hop_size: 160 #10ms, sample rate 16000, 10 * 16000 / 1000 = 160 ########################################################### # currently, we only support ecapa-tdnn in the ecapa_tdnn.yaml # if we want use another model, please choose another configuration yaml file +seed: 1234 emb_dim: 192 batch_size: 16 model: diff --git a/examples/ami/sd0/local/compute_embdding.py b/examples/ami/sd0/local/compute_embdding.py index e4fd5da2..30d49d51 100644 --- a/examples/ami/sd0/local/compute_embdding.py +++ b/examples/ami/sd0/local/compute_embdding.py @@ -94,7 +94,7 @@ def main(args, config): # stage2: build the speaker verification eval instance with backbone model model = SpeakerIdetification( - backbone=ecapa_tdnn, num_class=config.num_speakers) + backbone=ecapa_tdnn, num_class=1) # stage3: load the pre-trained model # we get the last model from the epoch and save_interval @@ -228,6 +228,5 @@ if __name__ == "__main__": config.merge_from_file(args.config) config.freeze() - print(config) main(args, config) diff --git a/examples/ami/sd0/local/data.sh b/examples/ami/sd0/local/data.sh deleted file mode 100755 index 478ec432..00000000 --- a/examples/ami/sd0/local/data.sh +++ /dev/null @@ -1,49 +0,0 @@ -#!/bin/bash - -stage=1 - -TARGET_DIR=${MAIN_ROOT}/dataset/ami -data_folder=${TARGET_DIR}/amicorpus #e.g., /path/to/amicorpus/ -manual_annot_folder=${TARGET_DIR}/ami_public_manual_1.6.2 #e.g., /path/to/ami_public_manual_1.6.2/ - -save_folder=${MAIN_ROOT}/examples/ami/sd0/data -ref_rttm_dir=${save_folder}/ref_rttms -meta_data_dir=${save_folder}/metadata - -set=L - -. ${MAIN_ROOT}/utils/parse_options.sh || exit 1; -set -u -set -o pipefail - -mkdir -p ${save_folder} - -if [ ${stage} -le 0 ]; then - # Download AMI corpus, You need around 10GB of free space to get whole data - # The signals are too large to package in this way, - # so you need to use the chooser to indicate which ones you wish to download - echo "Please follow https://groups.inf.ed.ac.uk/ami/download/ to download the data." - echo "Annotations: AMI manual annotations v1.6.2 " - echo "Signals: " - echo "1) Select one or more AMI meetings: the IDs please follow ./ami_split.py" - echo "2) Select media streams: Just select Headset mix" - exit 0; -fi - -if [ ${stage} -le 1 ]; then - echo "AMI Data preparation" - - python local/ami_prepare.py --data_folder ${data_folder} \ - --manual_annot_folder ${manual_annot_folder} \ - --save_folder ${save_folder} --ref_rttm_dir ${ref_rttm_dir} \ - --meta_data_dir ${meta_data_dir} - - if [ $? -ne 0 ]; then - echo "Prepare AMI failed. Please check log message." - exit 1 - fi - -fi - -echo "AMI data preparation done." -exit 0 diff --git a/examples/ami/sd0/local/experiment.py b/examples/ami/sd0/local/experiment.py index e912a489..5bb406d1 100755 --- a/examples/ami/sd0/local/experiment.py +++ b/examples/ami/sd0/local/experiment.py @@ -1,22 +1,16 @@ -#!/usr/bin/python3 -"""This recipe implements diarization system using deep embedding extraction followed by spectral clustering. - -To run this recipe: -> python experiment.py hparams/ - e.g., python experiment.py hparams/ecapa_tdnn.yaml - -Condition: Oracle VAD (speech regions taken from the groundtruth). - -Note: There are multiple ways to write this recipe. We iterate over individual recordings. - This approach is less GPU memory demanding and also makes code easy to understand. - -Citation: This recipe is based on the following paper, - N. Dawalatabad, M. Ravanelli, F. Grondin, J. Thienpondt, B. Desplanques, H. Na, - "ECAPA-TDNN Embeddings for Speaker Diarization," arXiv:2104.01466, 2021. - -Authors - * Nauman Dawalatabad 2020 -""" +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import argparse import glob import json @@ -81,10 +75,6 @@ def diarize_dataset( # diarizing different recordings in a dataset. for rec_id in tqdm(all_rec_ids): # this tag will be displayed in the log. - if rec_id == "IS1008a": - continue - if rec_id == "ES2011a": - continue tag = ("[" + str(split_type) + ": " + str(i) + "/" + str(len(all_rec_ids)) + "]") i = i + 1 @@ -434,6 +424,5 @@ if __name__ == "__main__": config.merge_from_file(args.config) config.freeze() - print(config) main(args, config) diff --git a/examples/ami/sd0/local/process.sh b/examples/ami/sd0/local/process.sh index 1b5ed5bd..72c58b10 100755 --- a/examples/ami/sd0/local/process.sh +++ b/examples/ami/sd0/local/process.sh @@ -1,6 +1,6 @@ #!/bin/bash -stage=2 +stage=0 set=L . ${MAIN_ROOT}/utils/parse_options.sh || exit 1;