PaddleSpeech/data_utils/speech.py

"""Contains the speech segment class."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from data_utils.audio import AudioSegment


class SpeechSegment(AudioSegment):
    """Speech segment abstraction, a subclass of AudioSegment,
    with an additional transcript.

    :param samples: Audio samples [num_samples x num_channels].
    :type samples: ndarray.float32
    :param sample_rate: Audio sample rate.
    :type sample_rate: int
    :param transcript: Transcript text for the speech.
    :type transript: basestring
    :raises TypeError: If the sample data type is not float or int.
    """

    def __init__(self, samples, sample_rate, transcript):
        AudioSegment.__init__(self, samples, sample_rate)
        self._transcript = transcript

    def __eq__(self, other):
        """Return whether two objects are equal.
        """
        if not AudioSegment.__eq__(self, other):
            return False
        if self._transcript != other._transcript:
            return False
        return True

    def __ne__(self, other):
        """Return whether two objects are unequal."""
        return not self.__eq__(other)

    @classmethod
    def from_file(cls, filepath, transcript):
        """Create speech segment from audio file and corresponding transcript.
        
        :param filepath: Filepath or file object to audio file.
        :type filepath: basestring|file
        :param transcript: Transcript text for the speech.
        :type transript: basestring
        :return: Speech segment instance.
        :rtype: SpeechSegment
        """
        audio = AudioSegment.from_file(filepath)
        return cls(audio.samples, audio.sample_rate, transcript)

    @classmethod
    def from_bytes(cls, bytes, transcript):
        """Create speech segment from a byte string and corresponding
        transcript.
        
        :param bytes: Byte string containing audio samples.
        :type bytes: str
        :param transcript: Transcript text for the speech.
        :type transript: basestring
        :return: Speech segment instance.
        :rtype: Speech Segment
        """
        audio = AudioSegment.from_bytes(bytes)
        return cls(audio.samples, audio.sample_rate, transcript)

    @classmethod
    def concatenate(cls, *segments):
        """Concatenate an arbitrary number of speech segments together, both
        audio and transcript will be concatenated.

        :param *segments: Input speech segments to be concatenated.
        :type *segments: tuple of SpeechSegment
        :return: Speech segment instance.
        :rtype: SpeechSegment
        :raises ValueError: If the number of segments is zero, or if the 
                            sample_rate of any two segments does not match.
        :raises TypeError: If any segment is not SpeechSegment instance.
        """
        if len(segments) == 0:
            raise ValueError("No speech segments are given to concatenate.")
        sample_rate = segments[0]._sample_rate
        transcripts = ""
        for seg in segments:
            if sample_rate != seg._sample_rate:
                raise ValueError("Can't concatenate segments with "
                                 "different sample rates")
            if type(seg) is not cls:
                raise TypeError("Only speech segments of the same type "
                                "instance can be concatenated.")
            transcripts += seg._transcript
        samples = np.concatenate([seg.samples for seg in segments])
        return cls(samples, sample_rate, transcripts)

    @classmethod
    def slice_from_file(cls, filepath, transcript, start=None, end=None):
        """Loads a small section of an speech without having to load
        the entire file into the memory which can be incredibly wasteful.

        :param filepath: Filepath or file object to audio file.
        :type filepath: basestring|file
        :param start: Start time in seconds. If start is negative, it wraps
                      around from the end. If not provided, this function 
                      reads from the very beginning.
        :type start: float
        :param end: End time in seconds. If end is negative, it wraps around
                    from the end. If not provided, the default behvaior is
                    to read to the end of the file.
        :type end: float
        :param transcript: Transcript text for the speech. if not provided, 
                           the defaults is an empty string.
        :type transript: basestring
        :return: SpeechSegment instance of the specified slice of the input
                 speech file.
        :rtype: SpeechSegment
        """
        audio = AudioSegment.slice_from_file(filepath, start, end)
        return cls(audio.samples, audio.sample_rate, transcript)

    @classmethod
    def make_silence(cls, duration, sample_rate):
        """Creates a silent speech segment of the given duration and
        sample rate, transcript will be an empty string.

        :param duration: Length of silence in seconds.
        :type duration: float
        :param sample_rate: Sample rate.
        :type sample_rate: float
        :return: Silence of the given duration.
        :rtype: SpeechSegment
        """
        audio = AudioSegment.make_silence(duration, sample_rate)
        return cls(audio.samples, audio.sample_rate, "")

    @property
    def transcript(self):
        """Return the transcript text.

        :return: Transcript text for the speech.
        :rtype: basestring
        """
        return self._transcript
Add function, class and module docs for data parts in DS2. 7 years ago			`"""Contains the speech segment class."""`
			`from __future__ import absolute_import`
			`from __future__ import division`
			`from __future__ import print_function`

			`from data_utils.audio import AudioSegment`


			`class SpeechSegment(AudioSegment):`
			`"""Speech segment abstraction, a subclass of AudioSegment,`
			`with an additional transcript.`

			`:param samples: Audio samples [num_samples x num_channels].`
			`:type samples: ndarray.float32`
			`:param sample_rate: Audio sample rate.`
			`:type sample_rate: int`
			`:param transcript: Transcript text for the speech.`
			`:type transript: basestring`
			`:raises TypeError: If the sample data type is not float or int.`
			`"""`

			`def __init__(self, samples, sample_rate, transcript):`
			`AudioSegment.__init__(self, samples, sample_rate)`
			`self._transcript = transcript`

			`def __eq__(self, other):`
			`"""Return whether two objects are equal.`
			`"""`
			`if not AudioSegment.__eq__(self, other):`
			`return False`
			`if self._transcript != other._transcript:`
			`return False`
			`return True`

			`def __ne__(self, other):`
			`"""Return whether two objects are unequal."""`
			`return not self.__eq__(other)`

			`@classmethod`
			`def from_file(cls, filepath, transcript):`
			`"""Create speech segment from audio file and corresponding transcript.`

			`:param filepath: Filepath or file object to audio file.`
			`:type filepath: basestring\|file`
			`:param transcript: Transcript text for the speech.`
			`:type transript: basestring`
Add multiprocess version of xmap_reader to speedup training. Add seqbin data parser to adapt to internal 1w data training. 7 years ago			`:return: Speech segment instance.`
			`:rtype: SpeechSegment`
Add function, class and module docs for data parts in DS2. 7 years ago			`"""`
			`audio = AudioSegment.from_file(filepath)`
			`return cls(audio.samples, audio.sample_rate, transcript)`

			`@classmethod`
			`def from_bytes(cls, bytes, transcript):`
			`"""Create speech segment from a byte string and corresponding`
			`transcript.`

			`:param bytes: Byte string containing audio samples.`
			`:type bytes: str`
			`:param transcript: Transcript text for the speech.`
			`:type transript: basestring`
Add multiprocess version of xmap_reader to speedup training. Add seqbin data parser to adapt to internal 1w data training. 7 years ago			`:return: Speech segment instance.`
			`:rtype: Speech Segment`
Add function, class and module docs for data parts in DS2. 7 years ago			`"""`
			`audio = AudioSegment.from_bytes(bytes)`
			`return cls(audio.samples, audio.sample_rate, transcript)`

modify audio and speech 7 years ago			`@classmethod`
			`def concatenate(cls, *segments):`
add audio file 7 years ago			`"""Concatenate an arbitrary number of speech segments together, both`
			`audio and transcript will be concatenated.`
modify audio and speech 7 years ago
add audio file 7 years ago			`:param *segments: Input speech segments to be concatenated.`
			`:type *segments: tuple of SpeechSegment`
modify audio and speech 7 years ago			`:return: Speech segment instance.`
			`:rtype: SpeechSegment`
add audio file 7 years ago			`:raises ValueError: If the number of segments is zero, or if the`
			`sample_rate of any two segments does not match.`
add audio file 7 years ago			`:raises TypeError: If any segment is not SpeechSegment instance.`
modify audio and speech 7 years ago			`"""`
			`if len(segments) == 0:`
add audio augmentation 7 years ago			`raise ValueError("No speech segments are given to concatenate.")`
modify audio and speech 7 years ago			`sample_rate = segments[0]._sample_rate`
add audio file 7 years ago			`transcripts = ""`
modify audio and speech 7 years ago			`for seg in segments:`
			`if sample_rate != seg._sample_rate:`
			`raise ValueError("Can't concatenate segments with "`
			`"different sample rates")`
			`if type(seg) is not cls:`
			`raise TypeError("Only speech segments of the same type "`
			`"instance can be concatenated.")`
add audio file 7 years ago			`transcripts += seg._transcript`
modify audio and speech 7 years ago			`samples = np.concatenate([seg.samples for seg in segments])`
add audio file 7 years ago			`return cls(samples, sample_rate, transcripts)`

			`@classmethod`
Add multi-threading support for DS2 data generator. 7 years ago			`def slice_from_file(cls, filepath, transcript, start=None, end=None):`
add audio file 7 years ago			`"""Loads a small section of an speech without having to load`
			`the entire file into the memory which can be incredibly wasteful.`

			`:param filepath: Filepath or file object to audio file.`
			`:type filepath: basestring\|file`
			`:param start: Start time in seconds. If start is negative, it wraps`
			`around from the end. If not provided, this function`
			`reads from the very beginning.`
			`:type start: float`
			`:param end: End time in seconds. If end is negative, it wraps around`
			`from the end. If not provided, the default behvaior is`
			`to read to the end of the file.`
			`:type end: float`
			`:param transcript: Transcript text for the speech. if not provided,`
			`the defaults is an empty string.`
			`:type transript: basestring`
			`:return: SpeechSegment instance of the specified slice of the input`
			`speech file.`
			`:rtype: SpeechSegment`
			`"""`
Add ImpulseResponseAugmentor and augmentation.config file. 7 years ago			`audio = AudioSegment.slice_from_file(filepath, start, end)`
add audio augmentation 7 years ago			`return cls(audio.samples, audio.sample_rate, transcript)`
add audio file 7 years ago
			`@classmethod`
			`def make_silence(cls, duration, sample_rate):`
			`"""Creates a silent speech segment of the given duration and`
add audio file 7 years ago			`sample rate, transcript will be an empty string.`
add audio file 7 years ago
			`:param duration: Length of silence in seconds.`
			`:type duration: float`
			`:param sample_rate: Sample rate.`
			`:type sample_rate: float`
			`:return: Silence of the given duration.`
add audio augmentation 7 years ago			`:rtype: SpeechSegment`
add audio file 7 years ago			`"""`
			`audio = AudioSegment.make_silence(duration, sample_rate)`
			`return cls(audio.samples, audio.sample_rate, "")`
modify audio and speech 7 years ago
Add function, class and module docs for data parts in DS2. 7 years ago			`@property`
			`def transcript(self):`
			`"""Return the transcript text.`

			`:return: Transcript text for the speech.`
			`:rtype: basestring`
			`"""`
			`return self._transcript`