update the note, test=doc

pull/1630/head
xiongxinlei 3 years ago
parent 38e4e9c893
commit a8244dc5b0

@ -85,7 +85,7 @@ fi
if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
# generate the vox2 manifest file from wav file # generate the vox2 manifest file from wav file
# we will generate the manifest.vox2 in ${dir}/vox2 directory # we will generate the ${dir}/vox2/manifest.vox2
# because we use all the vox2 dataset to train, so collect all the vox2 data in one file # because we use all the vox2 dataset to train, so collect all the vox2 data in one file
echo "start generate the vox2 manifest files" echo "start generate the vox2 manifest files"
python3 ${TARGET_DIR}/voxceleb/voxceleb2.py \ python3 ${TARGET_DIR}/voxceleb/voxceleb2.py \

@ -37,7 +37,6 @@ from paddlespeech.vector.modules.sid_model import SpeakerIdetification
from paddlespeech.vector.training.scheduler import CyclicLRScheduler from paddlespeech.vector.training.scheduler import CyclicLRScheduler
from paddlespeech.vector.training.seeding import seed_everything from paddlespeech.vector.training.seeding import seed_everything
from paddlespeech.vector.utils.time import Timer from paddlespeech.vector.utils.time import Timer
# from paddleaudio.datasets.voxceleb import VoxCeleb
logger = Log(__name__).getlog() logger = Log(__name__).getlog()

@ -17,6 +17,17 @@ import paddle
def waveform_collate_fn(batch): def waveform_collate_fn(batch):
"""Wrap the waveform into a batch form
Args:
batch (list): the waveform list from the dataloader
the item of data include several field
feat: the utterance waveform data
label: the utterance label encoding data
Returns:
dict: the batch data to dataloader
"""
waveforms = np.stack([item['feat'] for item in batch]) waveforms = np.stack([item['feat'] for item in batch])
labels = np.stack([item['label'] for item in batch]) labels = np.stack([item['label'] for item in batch])
@ -27,6 +38,18 @@ def feature_normalize(feats: paddle.Tensor,
mean_norm: bool=True, mean_norm: bool=True,
std_norm: bool=True, std_norm: bool=True,
convert_to_numpy: bool=False): convert_to_numpy: bool=False):
"""Do one utterance feature normalization
Args:
feats (paddle.Tensor): the original utterance feat, such as fbank, mfcc
mean_norm (bool, optional): mean norm flag. Defaults to True.
std_norm (bool, optional): std norm flag. Defaults to True.
convert_to_numpy (bool, optional): convert the paddle.tensor to numpy
and do feature norm with numpy. Defaults to False.
Returns:
paddle.Tensor : the normalized feats
"""
# Features normalization if needed # Features normalization if needed
# numpy.mean is a little with paddle.mean about 1e-6 # numpy.mean is a little with paddle.mean about 1e-6
if convert_to_numpy: if convert_to_numpy:
@ -60,6 +83,16 @@ def pad_right_2d(x, target_length, axis=-1, mode='constant', **kwargs):
def batch_feature_normalize(batch, mean_norm: bool=True, std_norm: bool=True): def batch_feature_normalize(batch, mean_norm: bool=True, std_norm: bool=True):
"""Do batch utterance features normalization
Args:
batch (list): the batch feature from dataloader
mean_norm (bool, optional): mean normalization flag. Defaults to True.
std_norm (bool, optional): std normalization flag. Defaults to True.
Returns:
dict: the normalized batch features
"""
ids = [item['utt_id'] for item in batch] ids = [item['utt_id'] for item in batch]
lengths = np.asarray([item['feat'].shape[1] for item in batch]) lengths = np.asarray([item['feat'].shape[1] for item in batch])
feats = list( feats = list(

Loading…
Cancel
Save