code format, test=doc

2 years ago · 7a03f36548
parent 378fe5909f
commit 7a03f36548
5 changed files with 16 additions and 86 deletions
--- a/examples/ami/sd0/conf/ecapa_tdnn.yaml
+++ b/examples/ami/sd0/conf/ecapa_tdnn.yaml
@ -1,13 +1,3 @@
-# ##################################################
-# Model: Speaker Diarization Baseline
-# Embeddings: Deep embedding
-# Clustering Technique: Spectral clustering
-# Authors: Nauman Dawalatabad 2020
-# #################################################
-
-seed: 1234
-num_speakers: 7205
-
 ###########################################################
 #                AMI DATA PREPARE SETTING               #
 ###########################################################
@ -44,6 +34,7 @@ hop_size: 160        #10ms, sample rate 16000, 10 * 16000 / 1000 = 160
 ###########################################################
 # currently, we only support ecapa-tdnn in the ecapa_tdnn.yaml
 # if we want use another model, please choose another configuration yaml file
+seed: 1234
 emb_dim: 192
 batch_size: 16
 model:
--- a/examples/ami/sd0/local/compute_embdding.py
+++ b/examples/ami/sd0/local/compute_embdding.py
@ -94,7 +94,7 @@ def main(args, config):

    # stage2: build the speaker verification eval instance with backbone model
    model = SpeakerIdetification(
-        backbone=ecapa_tdnn, num_class=config.num_speakers)
+        backbone=ecapa_tdnn, num_class=1)

    # stage3: load the pre-trained model
    #         we get the last model from the epoch and save_interval
@ -228,6 +228,5 @@ if __name__ == "__main__":
        config.merge_from_file(args.config)

    config.freeze()
-    print(config)

    main(args, config)
--- a/examples/ami/sd0/local/data.sh
+++ b/examples/ami/sd0/local/data.sh
@ -1,49 +0,0 @@
-#!/bin/bash
-
-stage=1
-
-TARGET_DIR=${MAIN_ROOT}/dataset/ami
-data_folder=${TARGET_DIR}/amicorpus #e.g., /path/to/amicorpus/
-manual_annot_folder=${TARGET_DIR}/ami_public_manual_1.6.2 #e.g., /path/to/ami_public_manual_1.6.2/
-
-save_folder=${MAIN_ROOT}/examples/ami/sd0/data
-ref_rttm_dir=${save_folder}/ref_rttms
-meta_data_dir=${save_folder}/metadata
-
-set=L
-
-. ${MAIN_ROOT}/utils/parse_options.sh || exit 1;
-set -u
-set -o pipefail
-
-mkdir -p ${save_folder}
-
-if [ ${stage} -le 0 ]; then
-    # Download AMI corpus, You need around 10GB of free space to get whole data
-    # The signals are too large to package in this way,
-    # so you need to use the chooser to indicate which ones you wish to download
-    echo "Please follow https://groups.inf.ed.ac.uk/ami/download/ to download the data."
-    echo "Annotations: AMI manual annotations v1.6.2 "
-    echo "Signals: "
-    echo "1) Select one or more AMI meetings: the IDs please follow ./ami_split.py"
-    echo "2) Select media streams: Just select Headset mix"
-    exit 0;
-fi
-
-if [ ${stage} -le 1 ]; then
-    echo "AMI Data preparation"
-
-    python local/ami_prepare.py  --data_folder ${data_folder} \
-            --manual_annot_folder ${manual_annot_folder} \
-            --save_folder ${save_folder} --ref_rttm_dir ${ref_rttm_dir} \
-            --meta_data_dir ${meta_data_dir} 
-    
-    if [ $? -ne 0 ]; then
-        echo "Prepare AMI failed. Please check log message."
-        exit 1
-    fi
-            
-fi
-
-echo "AMI data preparation done."
-exit 0
--- a/examples/ami/sd0/local/experiment.py
+++ b/examples/ami/sd0/local/experiment.py
@ -1,22 +1,16 @@
-#!/usr/bin/python3
-"""This recipe implements diarization system using deep embedding extraction followed by spectral clustering.
-
-To run this recipe:
-> python experiment.py hparams/<your_hyperparams_file.yaml>
- e.g., python experiment.py hparams/ecapa_tdnn.yaml
-
-Condition: Oracle VAD (speech regions taken from the groundtruth).
-
-Note: There are multiple ways to write this recipe. We iterate over individual recordings.
- This approach is less GPU memory demanding and also makes code easy to understand.
-
-Citation: This recipe is based on the following paper,
- N. Dawalatabad, M. Ravanelli, F. Grondin, J. Thienpondt, B. Desplanques, H. Na,
- "ECAPA-TDNN Embeddings for Speaker Diarization," arXiv:2104.01466, 2021.
-
-Authors
- * Nauman Dawalatabad 2020
-"""
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import argparse
 import glob
 import json
@ -81,10 +75,6 @@ def diarize_dataset(
    # diarizing different recordings in a dataset.
    for rec_id in tqdm(all_rec_ids):
        # this tag will be displayed in the log.
-        if rec_id == "IS1008a":
-            continue
-        if rec_id == "ES2011a":
-            continue
        tag = ("[" + str(split_type) + ": " + str(i) + "/" +
               str(len(all_rec_ids)) + "]")
        i = i + 1
@ -434,6 +424,5 @@ if __name__ == "__main__":
        config.merge_from_file(args.config)

    config.freeze()
-    print(config)

    main(args, config)
--- a/examples/ami/sd0/local/process.sh
+++ b/examples/ami/sd0/local/process.sh
@ -1,6 +1,6 @@
 #!/bin/bash

-stage=2
+stage=0
 set=L

 . ${MAIN_ROOT}/utils/parse_options.sh || exit 1;