From d1ee10be102263da5fbfac1e131c31ed605b5ad0 Mon Sep 17 00:00:00 2001 From: chrisxu2016 <823254351@qq.com> Date: Fri, 16 Jun 2017 18:29:56 +0800 Subject: [PATCH] modify audio and speech --- data_utils/audio.py | 14 ++++++++------ data_utils/speech.py | 26 ++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 6 deletions(-) diff --git a/data_utils/audio.py b/data_utils/audio.py index ee4e6d84..066437dc 100755 --- a/data_utils/audio.py +++ b/data_utils/audio.py @@ -104,7 +104,8 @@ class AudioSegment(object): io.BytesIO(bytes), dtype='float32') return cls(samples, sample_rate) - def concatenate(self, *segments): + @classmethod + def concatenate(cls, *segments): """Concatenate an arbitrary number of audio segments together. :param *segments: Input audio segments @@ -123,11 +124,11 @@ class AudioSegment(object): if sample_rate != seg._sample_rate: raise ValueError("Can't concatenate segments with " "different sample rates") - if type(seg) is not type(self): + if type(seg) is not cls: raise TypeError("Only audio segments of the same type " "instance can be concatenated.") samples = np.concatenate([seg.samples for seg in segments]) - return type(self)(samples, sample_rate) + return cls(samples, sample_rate) def to_wav_file(self, filepath, dtype='float32'): """Save audio segment to disk as wav file. @@ -355,13 +356,14 @@ class AudioSegment(object): """ if duration == 0.0: return self + cls = type(self) silence = self.make_silence(duration, self._sample_rate) if sides == "beginning": - padded = self.concatenate(silence, self) + padded = cls.concatenate(silence, self) elif sides == "end": - padded = self.concatenate(self, silence) + padded = cls.concatenate(self, silence) elif sides == "both": - padded = self.concatenate(silence, self, silence) + padded = cls.concatenate(silence, self, silence) else: raise ValueError("Unknown value for the kwarg %s" % sides) self._samples = padded._samples diff --git a/data_utils/speech.py b/data_utils/speech.py index 48db595b..5d1fc15a 100755 --- a/data_utils/speech.py +++ b/data_utils/speech.py @@ -65,6 +65,32 @@ class SpeechSegment(AudioSegment): audio = AudioSegment.from_bytes(bytes) return cls(audio.samples, audio.sample_rate, transcript) + @classmethod + def concatenate(cls, *segments): + """Concatenate an arbitrary number of audio segments together. + + :param *segments: Input speech segments + :type *segments: SpeechSegment + :return: Speech segment instance. + :rtype: SpeechSegment + :raises ValueError: If number of segments is zero, or if sample_rate + not match between two audio segments + :raises TypeError: If item of segments is not Audiosegment instance + """ + # Perform basic sanity-checks. + if len(segments) == 0: + raise ValueError("No audio segments are given to concatenate.") + sample_rate = segments[0]._sample_rate + for seg in segments: + if sample_rate != seg._sample_rate: + raise ValueError("Can't concatenate segments with " + "different sample rates") + if type(seg) is not cls: + raise TypeError("Only speech segments of the same type " + "instance can be concatenated.") + samples = np.concatenate([seg.samples for seg in segments]) + return cls(samples, sample_rate, seg._transcript) + @property def transcript(self): """Return the transcript text.