From 7a03f36548aae74964d273af91dc943cc9175a4a Mon Sep 17 00:00:00 2001
From: ccrrong <1039058843@qq.com>
Date: Tue, 5 Apr 2022 19:49:44 +0800
Subject: [PATCH] code format, test=doc

---
 examples/ami/sd0/conf/ecapa_tdnn.yaml      | 11 +----
 examples/ami/sd0/local/compute_embdding.py |  3 +-
 examples/ami/sd0/local/data.sh             | 49 ----------------------
 examples/ami/sd0/local/experiment.py       | 37 ++++++----------
 examples/ami/sd0/local/process.sh          |  2 +-
 5 files changed, 16 insertions(+), 86 deletions(-)
 delete mode 100755 examples/ami/sd0/local/data.sh

diff --git a/examples/ami/sd0/conf/ecapa_tdnn.yaml b/examples/ami/sd0/conf/ecapa_tdnn.yaml
index 0f298c35..319e4497 100755
--- a/examples/ami/sd0/conf/ecapa_tdnn.yaml
+++ b/examples/ami/sd0/conf/ecapa_tdnn.yaml
@@ -1,13 +1,3 @@
-# ##################################################
-# Model: Speaker Diarization Baseline
-# Embeddings: Deep embedding
-# Clustering Technique: Spectral clustering
-# Authors: Nauman Dawalatabad 2020
-# #################################################
-
-seed: 1234
-num_speakers: 7205
-
 ###########################################################
 #                AMI DATA PREPARE SETTING               #
 ###########################################################
@@ -44,6 +34,7 @@ hop_size: 160        #10ms, sample rate 16000, 10 * 16000 / 1000 = 160
 ###########################################################
 # currently, we only support ecapa-tdnn in the ecapa_tdnn.yaml
 # if we want use another model, please choose another configuration yaml file
+seed: 1234
 emb_dim: 192
 batch_size: 16
 model:
diff --git a/examples/ami/sd0/local/compute_embdding.py b/examples/ami/sd0/local/compute_embdding.py
index e4fd5da2..30d49d51 100644
--- a/examples/ami/sd0/local/compute_embdding.py
+++ b/examples/ami/sd0/local/compute_embdding.py
@@ -94,7 +94,7 @@ def main(args, config):
 
     # stage2: build the speaker verification eval instance with backbone model
     model = SpeakerIdetification(
-        backbone=ecapa_tdnn, num_class=config.num_speakers)
+        backbone=ecapa_tdnn, num_class=1)
 
     # stage3: load the pre-trained model
     #         we get the last model from the epoch and save_interval
@@ -228,6 +228,5 @@ if __name__ == "__main__":
         config.merge_from_file(args.config)
 
     config.freeze()
-    print(config)
 
     main(args, config)
diff --git a/examples/ami/sd0/local/data.sh b/examples/ami/sd0/local/data.sh
deleted file mode 100755
index 478ec432..00000000
--- a/examples/ami/sd0/local/data.sh
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/bin/bash
-
-stage=1
-
-TARGET_DIR=${MAIN_ROOT}/dataset/ami
-data_folder=${TARGET_DIR}/amicorpus #e.g., /path/to/amicorpus/
-manual_annot_folder=${TARGET_DIR}/ami_public_manual_1.6.2 #e.g., /path/to/ami_public_manual_1.6.2/
-
-save_folder=${MAIN_ROOT}/examples/ami/sd0/data
-ref_rttm_dir=${save_folder}/ref_rttms
-meta_data_dir=${save_folder}/metadata
-
-set=L
-
-. ${MAIN_ROOT}/utils/parse_options.sh || exit 1;
-set -u
-set -o pipefail
-
-mkdir -p ${save_folder}
-
-if [ ${stage} -le 0 ]; then
-    # Download AMI corpus, You need around 10GB of free space to get whole data
-    # The signals are too large to package in this way,
-    # so you need to use the chooser to indicate which ones you wish to download
-    echo "Please follow https://groups.inf.ed.ac.uk/ami/download/ to download the data."
-    echo "Annotations: AMI manual annotations v1.6.2 "
-    echo "Signals: "
-    echo "1) Select one or more AMI meetings: the IDs please follow ./ami_split.py"
-    echo "2) Select media streams: Just select Headset mix"
-    exit 0;
-fi
-
-if [ ${stage} -le 1 ]; then
-    echo "AMI Data preparation"
-
-    python local/ami_prepare.py  --data_folder ${data_folder} \
-            --manual_annot_folder ${manual_annot_folder} \
-            --save_folder ${save_folder} --ref_rttm_dir ${ref_rttm_dir} \
-            --meta_data_dir ${meta_data_dir} 
-    
-    if [ $? -ne 0 ]; then
-        echo "Prepare AMI failed. Please check log message."
-        exit 1
-    fi
-            
-fi
-
-echo "AMI data preparation done."
-exit 0
diff --git a/examples/ami/sd0/local/experiment.py b/examples/ami/sd0/local/experiment.py
index e912a489..5bb406d1 100755
--- a/examples/ami/sd0/local/experiment.py
+++ b/examples/ami/sd0/local/experiment.py
@@ -1,22 +1,16 @@
-#!/usr/bin/python3
-"""This recipe implements diarization system using deep embedding extraction followed by spectral clustering.
-
-To run this recipe:
-> python experiment.py hparams/<your_hyperparams_file.yaml>
- e.g., python experiment.py hparams/ecapa_tdnn.yaml
-
-Condition: Oracle VAD (speech regions taken from the groundtruth).
-
-Note: There are multiple ways to write this recipe. We iterate over individual recordings.
- This approach is less GPU memory demanding and also makes code easy to understand.
-
-Citation: This recipe is based on the following paper,
- N. Dawalatabad, M. Ravanelli, F. Grondin, J. Thienpondt, B. Desplanques, H. Na,
- "ECAPA-TDNN Embeddings for Speaker Diarization," arXiv:2104.01466, 2021.
-
-Authors
- * Nauman Dawalatabad 2020
-"""
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import argparse
 import glob
 import json
@@ -81,10 +75,6 @@ def diarize_dataset(
     # diarizing different recordings in a dataset.
     for rec_id in tqdm(all_rec_ids):
         # this tag will be displayed in the log.
-        if rec_id == "IS1008a":
-            continue
-        if rec_id == "ES2011a":
-            continue
         tag = ("[" + str(split_type) + ": " + str(i) + "/" +
                str(len(all_rec_ids)) + "]")
         i = i + 1
@@ -434,6 +424,5 @@ if __name__ == "__main__":
         config.merge_from_file(args.config)
 
     config.freeze()
-    print(config)
 
     main(args, config)
diff --git a/examples/ami/sd0/local/process.sh b/examples/ami/sd0/local/process.sh
index 1b5ed5bd..72c58b10 100755
--- a/examples/ami/sd0/local/process.sh
+++ b/examples/ami/sd0/local/process.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-stage=2
+stage=0
 set=L
 
 . ${MAIN_ROOT}/utils/parse_options.sh || exit 1;