From 8e5c2eb9697e4b23d9283b6d885239423e5c65d3 Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Sun, 8 Oct 2017 05:21:44 -0700 Subject: [PATCH] Update by following reviewer's comments for pull request #355. --- data_utils/audio.py | 20 +++++++++++++++++++- data_utils/utility.py | 6 +----- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/data_utils/audio.py b/data_utils/audio.py index 895a7899..01c06484 100644 --- a/data_utils/audio.py +++ b/data_utils/audio.py @@ -118,7 +118,25 @@ class AudioSegment(object): @classmethod def from_sequence_file(cls, filepath): - """Create audio segment from sequence file. + """Create audio segment from sequence file. Sequence file is a binary + file containing a collection of multiple audio files, with several + header bytes in the head indicating the offsets of each audio byte data + chunk. + + The format is: + + 4 bytes (int, version), + 4 bytes (int, num of utterance), + 4 bytes (int, bytes per header), + [bytes_per_header*(num_utterance+1)] bytes (offsets for each audio), + audio_bytes_data_of_1st_utterance, + audio_bytes_data_of_2nd_utterance, + ...... + + Sequence file name must end with ".seqbin". And the filename of the 5th + utterance's audio file in sequence file "xxx.seqbin" must be + "xxx.seqbin_5", with "5" indicating the utterance index within this + sequence file (starting from 1). :param filepath: Filepath of sequence file. :type filepath: basestring diff --git a/data_utils/utility.py b/data_utils/utility.py index 123348cb..96df2485 100644 --- a/data_utils/utility.py +++ b/data_utils/utility.py @@ -148,11 +148,7 @@ def xmap_readers_mp(mapper, reader, process_num, buffer_size, order=False): w.start() # get results - sample = out_queue.get() - while not isinstance(sample, XmapEndSignal): - yield sample - sample = out_queue.get() - finish = 1 + finish = 0 while finish < process_num: sample = out_queue.get() if isinstance(sample, XmapEndSignal):