update the note, test=doc

4 years ago · a8244dc5b0
parent 38e4e9c893
commit a8244dc5b0
3 changed files with 34 additions and 2 deletions
--- a/examples/voxceleb/sv0/local/data.sh
+++ b/examples/voxceleb/sv0/local/data.sh
@ -85,7 +85,7 @@ fi

 if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
   # generate the vox2 manifest file from wav file
-   # we will generate the manifest.vox2 in ${dir}/vox2 directory
+   # we will generate the ${dir}/vox2/manifest.vox2
   # because we use all the vox2 dataset to train, so collect all the vox2 data in one file
   echo "start generate the vox2 manifest files"
   python3 ${TARGET_DIR}/voxceleb/voxceleb2.py \
--- a/paddlespeech/vector/exps/ecapa_tdnn/train.py
+++ b/paddlespeech/vector/exps/ecapa_tdnn/train.py
@ -37,7 +37,6 @@ from paddlespeech.vector.modules.sid_model import SpeakerIdetification
 from paddlespeech.vector.training.scheduler import CyclicLRScheduler
 from paddlespeech.vector.training.seeding import seed_everything
 from paddlespeech.vector.utils.time import Timer
-# from paddleaudio.datasets.voxceleb import VoxCeleb

 logger = Log(__name__).getlog()

--- a/paddlespeech/vector/io/batch.py
+++ b/paddlespeech/vector/io/batch.py
@ -17,6 +17,17 @@ import paddle


 def waveform_collate_fn(batch):
+    """Wrap the waveform into a batch form
+
+    Args:
+        batch (list): the waveform list from the dataloader
+                      the item of data include several field
+                      feat: the utterance waveform data
+                      label: the utterance label encoding data
+
+    Returns:
+        dict: the batch data to dataloader
+    """
    waveforms = np.stack([item['feat'] for item in batch])
    labels = np.stack([item['label'] for item in batch])

@ -27,6 +38,18 @@ def feature_normalize(feats: paddle.Tensor,
                      mean_norm: bool=True,
                      std_norm: bool=True,
                      convert_to_numpy: bool=False):
+    """Do one utterance feature normalization
+
+    Args:
+        feats (paddle.Tensor): the original utterance feat, such as fbank, mfcc
+        mean_norm (bool, optional): mean norm flag. Defaults to True.
+        std_norm (bool, optional): std norm flag. Defaults to True.
+        convert_to_numpy (bool, optional): convert the paddle.tensor to numpy 
+                                           and do feature norm with numpy. Defaults to False.
+
+    Returns:
+        paddle.Tensor : the normalized feats
+    """
    # Features normalization if needed
    # numpy.mean is a little with paddle.mean about 1e-6
    if convert_to_numpy:
@ -60,6 +83,16 @@ def pad_right_2d(x, target_length, axis=-1, mode='constant', **kwargs):


 def batch_feature_normalize(batch, mean_norm: bool=True, std_norm: bool=True):
+    """Do batch utterance features normalization
+
+    Args:
+        batch (list): the batch feature from dataloader
+        mean_norm (bool, optional): mean normalization flag. Defaults to True.
+        std_norm (bool, optional): std normalization flag. Defaults to True.
+
+    Returns:
+        dict: the normalized batch features
+    """
    ids = [item['utt_id'] for item in batch]
    lengths = np.asarray([item['feat'].shape[1] for item in batch])
    feats = list(