From a8244dc5b07dc9710f3a9ca773ab777c2361f551 Mon Sep 17 00:00:00 2001
From: xiongxinlei <xiongxinlei@baidu.com>
Date: Thu, 7 Apr 2022 19:09:45 +0800
Subject: [PATCH] update the note, test=doc

---
 examples/voxceleb/sv0/local/data.sh          |  2 +-
 paddlespeech/vector/exps/ecapa_tdnn/train.py |  1 -
 paddlespeech/vector/io/batch.py              | 33 ++++++++++++++++++++
 3 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/examples/voxceleb/sv0/local/data.sh b/examples/voxceleb/sv0/local/data.sh
index da44d431..d6010ec6 100755
--- a/examples/voxceleb/sv0/local/data.sh
+++ b/examples/voxceleb/sv0/local/data.sh
@@ -85,7 +85,7 @@ fi
 
 if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    # generate the vox2 manifest file from wav file
-   # we will generate the manifest.vox2 in ${dir}/vox2 directory
+   # we will generate the ${dir}/vox2/manifest.vox2
    # because we use all the vox2 dataset to train, so collect all the vox2 data in one file
    echo "start generate the vox2 manifest files"
    python3 ${TARGET_DIR}/voxceleb/voxceleb2.py \
diff --git a/paddlespeech/vector/exps/ecapa_tdnn/train.py b/paddlespeech/vector/exps/ecapa_tdnn/train.py
index 7ff6cb69..c1590c8f 100644
--- a/paddlespeech/vector/exps/ecapa_tdnn/train.py
+++ b/paddlespeech/vector/exps/ecapa_tdnn/train.py
@@ -37,7 +37,6 @@ from paddlespeech.vector.modules.sid_model import SpeakerIdetification
 from paddlespeech.vector.training.scheduler import CyclicLRScheduler
 from paddlespeech.vector.training.seeding import seed_everything
 from paddlespeech.vector.utils.time import Timer
-# from paddleaudio.datasets.voxceleb import VoxCeleb
 
 logger = Log(__name__).getlog()
 
diff --git a/paddlespeech/vector/io/batch.py b/paddlespeech/vector/io/batch.py
index b85563e7..5049d194 100644
--- a/paddlespeech/vector/io/batch.py
+++ b/paddlespeech/vector/io/batch.py
@@ -17,6 +17,17 @@ import paddle
 
 
 def waveform_collate_fn(batch):
+    """Wrap the waveform into a batch form
+
+    Args:
+        batch (list): the waveform list from the dataloader
+                      the item of data include several field
+                      feat: the utterance waveform data
+                      label: the utterance label encoding data
+
+    Returns:
+        dict: the batch data to dataloader
+    """
     waveforms = np.stack([item['feat'] for item in batch])
     labels = np.stack([item['label'] for item in batch])
 
@@ -27,6 +38,18 @@ def feature_normalize(feats: paddle.Tensor,
                       mean_norm: bool=True,
                       std_norm: bool=True,
                       convert_to_numpy: bool=False):
+    """Do one utterance feature normalization
+
+    Args:
+        feats (paddle.Tensor): the original utterance feat, such as fbank, mfcc
+        mean_norm (bool, optional): mean norm flag. Defaults to True.
+        std_norm (bool, optional): std norm flag. Defaults to True.
+        convert_to_numpy (bool, optional): convert the paddle.tensor to numpy 
+                                           and do feature norm with numpy. Defaults to False.
+
+    Returns:
+        paddle.Tensor : the normalized feats
+    """
     # Features normalization if needed
     # numpy.mean is a little with paddle.mean about 1e-6
     if convert_to_numpy:
@@ -60,6 +83,16 @@ def pad_right_2d(x, target_length, axis=-1, mode='constant', **kwargs):
 
 
 def batch_feature_normalize(batch, mean_norm: bool=True, std_norm: bool=True):
+    """Do batch utterance features normalization
+
+    Args:
+        batch (list): the batch feature from dataloader
+        mean_norm (bool, optional): mean normalization flag. Defaults to True.
+        std_norm (bool, optional): std normalization flag. Defaults to True.
+
+    Returns:
+        dict: the normalized batch features
+    """
     ids = [item['utt_id'] for item in batch]
     lengths = np.asarray([item['feat'].shape[1] for item in batch])
     feats = list(