@ -11,7 +11,6 @@ import multiprocessing
import numpy as np
import numpy as np
import paddle . v2 as paddle
import paddle . v2 as paddle
from threading import local
from threading import local
import atexit
from data_utils . utility import read_manifest
from data_utils . utility import read_manifest
from data_utils . utility import xmap_readers_mp
from data_utils . utility import xmap_readers_mp
from data_utils . augmentor . augmentation import AugmentationPipeline
from data_utils . augmentor . augmentation import AugmentationPipeline
@ -194,15 +193,18 @@ class DataGenerator(object):
raise ValueError ( " Unknown shuffle method %s . " %
raise ValueError ( " Unknown shuffle method %s . " %
shuffle_method )
shuffle_method )
# prepare batches
# prepare batches
instance_reader = self . _instance_reader_creator ( manifest )
instance_reader , cleanup = self . _instance_reader_creator ( manifest )
batch = [ ]
batch = [ ]
for instance in instance_reader ( ) :
try :
batch . append ( instance )
for instance in instance_reader ( ) :
if len ( batch ) == batch_size :
batch . append ( instance )
if len ( batch ) == batch_size :
yield self . _padding_batch ( batch , padding_to , flatten )
batch = [ ]
if len ( batch ) > = min_batch_size :
yield self . _padding_batch ( batch , padding_to , flatten )
yield self . _padding_batch ( batch , padding_to , flatten )
batch = [ ]
finally :
if len ( batch ) > = min_batch_size :
cleanup ( )
yield self . _padding_batch ( batch , padding_to , flatten )
self . _epoch + = 1
self . _epoch + = 1
return batch_reader
return batch_reader
@ -280,10 +282,7 @@ class DataGenerator(object):
lambda instance : self . process_utterance ( instance [ " audio_filepath " ] , instance [ " text " ] ) ,
lambda instance : self . process_utterance ( instance [ " audio_filepath " ] , instance [ " text " ] ) ,
reader , self . _num_threads , 4096 )
reader , self . _num_threads , 4096 )
# register callback to main process
return reader , cleanup_callback
atexit . register ( cleanup_callback )
return reader
def _padding_batch ( self , batch , padding_to = - 1 , flatten = False ) :
def _padding_batch ( self , batch , padding_to = - 1 , flatten = False ) :
"""
"""