You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
253 lines
8.4 KiB
253 lines
8.4 KiB
"""Contains the audio segment class."""
|
|
from __future__ import absolute_import
|
|
from __future__ import division
|
|
from __future__ import print_function
|
|
|
|
import numpy as np
|
|
import io
|
|
import soundfile
|
|
|
|
|
|
class AudioSegment(object):
|
|
"""Monaural audio segment abstraction.
|
|
|
|
:param samples: Audio samples [num_samples x num_channels].
|
|
:type samples: ndarray.float32
|
|
:param sample_rate: Audio sample rate.
|
|
:type sample_rate: int
|
|
:raises TypeError: If the sample data type is not float or int.
|
|
"""
|
|
|
|
def __init__(self, samples, sample_rate):
|
|
"""Create audio segment from samples.
|
|
|
|
Samples are convert float32 internally, with int scaled to [-1, 1].
|
|
"""
|
|
self._samples = self._convert_samples_to_float32(samples)
|
|
self._sample_rate = sample_rate
|
|
if self._samples.ndim >= 2:
|
|
self._samples = np.mean(self._samples, 1)
|
|
|
|
def __eq__(self, other):
|
|
"""Return whether two objects are equal."""
|
|
if type(other) is not type(self):
|
|
return False
|
|
if self._sample_rate != other._sample_rate:
|
|
return False
|
|
if self._samples.shape != other._samples.shape:
|
|
return False
|
|
if np.any(self.samples != other._samples):
|
|
return False
|
|
return True
|
|
|
|
def __ne__(self, other):
|
|
"""Return whether two objects are unequal."""
|
|
return not self.__eq__(other)
|
|
|
|
def __str__(self):
|
|
"""Return human-readable representation of segment."""
|
|
return ("%s: num_samples=%d, sample_rate=%d, duration=%.2fsec, "
|
|
"rms=%.2fdB" % (type(self), self.num_samples, self.sample_rate,
|
|
self.duration, self.rms_db))
|
|
|
|
@classmethod
|
|
def from_file(cls, file):
|
|
"""Create audio segment from audio file.
|
|
|
|
:param filepath: Filepath or file object to audio file.
|
|
:type filepath: basestring|file
|
|
:return: Audio segment instance.
|
|
:rtype: AudioSegment
|
|
"""
|
|
samples, sample_rate = soundfile.read(file, dtype='float32')
|
|
return cls(samples, sample_rate)
|
|
|
|
@classmethod
|
|
def from_bytes(cls, bytes):
|
|
"""Create audio segment from a byte string containing audio samples.
|
|
|
|
:param bytes: Byte string containing audio samples.
|
|
:type bytes: str
|
|
:return: Audio segment instance.
|
|
:rtype: AudioSegment
|
|
"""
|
|
samples, sample_rate = soundfile.read(
|
|
io.BytesIO(bytes), dtype='float32')
|
|
return cls(samples, sample_rate)
|
|
|
|
def to_wav_file(self, filepath, dtype='float32'):
|
|
"""Save audio segment to disk as wav file.
|
|
|
|
:param filepath: WAV filepath or file object to save the
|
|
audio segment.
|
|
:type filepath: basestring|file
|
|
:param dtype: Subtype for audio file. Options: 'int16', 'int32',
|
|
'float32', 'float64'. Default is 'float32'.
|
|
:type dtype: str
|
|
:raises TypeError: If dtype is not supported.
|
|
"""
|
|
samples = self._convert_samples_from_float32(self._samples, dtype)
|
|
subtype_map = {
|
|
'int16': 'PCM_16',
|
|
'int32': 'PCM_32',
|
|
'float32': 'FLOAT',
|
|
'float64': 'DOUBLE'
|
|
}
|
|
soundfile.write(
|
|
filepath,
|
|
samples,
|
|
self._sample_rate,
|
|
format='WAV',
|
|
subtype=subtype_map[dtype])
|
|
|
|
def to_bytes(self, dtype='float32'):
|
|
"""Create a byte string containing the audio content.
|
|
|
|
:param dtype: Data type for export samples. Options: 'int16', 'int32',
|
|
'float32', 'float64'. Default is 'float32'.
|
|
:type dtype: str
|
|
:return: Byte string containing audio content.
|
|
:rtype: str
|
|
"""
|
|
samples = self._convert_samples_from_float32(self._samples, dtype)
|
|
return samples.tostring()
|
|
|
|
def apply_gain(self, gain):
|
|
"""Apply gain in decibels to samples.
|
|
|
|
Note that this is an in-place transformation.
|
|
|
|
:param gain: Gain in decibels to apply to samples.
|
|
:type gain: float
|
|
"""
|
|
self._samples *= 10.**(gain / 20.)
|
|
|
|
def change_speed(self, speed_rate):
|
|
"""Change the audio speed by linear interpolation.
|
|
|
|
Note that this is an in-place transformation.
|
|
|
|
:param speed_rate: Rate of speed change:
|
|
speed_rate > 1.0, speed up the audio;
|
|
speed_rate = 1.0, unchanged;
|
|
speed_rate < 1.0, slow down the audio;
|
|
speed_rate <= 0.0, not allowed, raise ValueError.
|
|
:type speed_rate: float
|
|
:raises ValueError: If speed_rate <= 0.0.
|
|
"""
|
|
if speed_rate <= 0:
|
|
raise ValueError("speed_rate should be greater than zero.")
|
|
old_length = self._samples.shape[0]
|
|
new_length = int(old_length / speed_rate)
|
|
old_indices = np.arange(old_length)
|
|
new_indices = np.linspace(start=0, stop=old_length, num=new_length)
|
|
self._samples = np.interp(new_indices, old_indices, self._samples)
|
|
|
|
def normalize(self, target_sample_rate):
|
|
raise NotImplementedError()
|
|
|
|
def resample(self, target_sample_rate):
|
|
raise NotImplementedError()
|
|
|
|
def pad_silence(self, duration, sides='both'):
|
|
raise NotImplementedError()
|
|
|
|
def subsegment(self, start_sec=None, end_sec=None):
|
|
raise NotImplementedError()
|
|
|
|
def convolve(self, filter, allow_resample=False):
|
|
raise NotImplementedError()
|
|
|
|
def convolve_and_normalize(self, filter, allow_resample=False):
|
|
raise NotImplementedError()
|
|
|
|
@property
|
|
def samples(self):
|
|
"""Return audio samples.
|
|
|
|
:return: Audio samples.
|
|
:rtype: ndarray
|
|
"""
|
|
return self._samples.copy()
|
|
|
|
@property
|
|
def sample_rate(self):
|
|
"""Return audio sample rate.
|
|
|
|
:return: Audio sample rate.
|
|
:rtype: int
|
|
"""
|
|
return self._sample_rate
|
|
|
|
@property
|
|
def num_samples(self):
|
|
"""Return number of samples.
|
|
|
|
:return: Number of samples.
|
|
:rtype: int
|
|
"""
|
|
return self._samples.shape(0)
|
|
|
|
@property
|
|
def duration(self):
|
|
"""Return audio duration.
|
|
|
|
:return: Audio duration in seconds.
|
|
:rtype: float
|
|
"""
|
|
return self._samples.shape[0] / float(self._sample_rate)
|
|
|
|
@property
|
|
def rms_db(self):
|
|
"""Return root mean square energy of the audio in decibels.
|
|
|
|
:return: Root mean square energy in decibels.
|
|
:rtype: float
|
|
"""
|
|
# square root => multiply by 10 instead of 20 for dBs
|
|
mean_square = np.mean(self._samples**2)
|
|
return 10 * np.log10(mean_square)
|
|
|
|
def _convert_samples_to_float32(self, samples):
|
|
"""Convert sample type to float32.
|
|
|
|
Audio sample type is usually integer or float-point.
|
|
Integers will be scaled to [-1, 1] in float32.
|
|
"""
|
|
float32_samples = samples.astype('float32')
|
|
if samples.dtype in np.sctypes['int']:
|
|
bits = np.iinfo(samples.dtype).bits
|
|
float32_samples *= (1. / 2**(bits - 1))
|
|
elif samples.dtype in np.sctypes['float']:
|
|
pass
|
|
else:
|
|
raise TypeError("Unsupported sample type: %s." % samples.dtype)
|
|
return float32_samples
|
|
|
|
def _convert_samples_from_float32(self, samples, dtype):
|
|
"""Convert sample type from float32 to dtype.
|
|
|
|
Audio sample type is usually integer or float-point. For integer
|
|
type, float32 will be rescaled from [-1, 1] to the maximum range
|
|
supported by the integer type.
|
|
|
|
This is for writing a audio file.
|
|
"""
|
|
dtype = np.dtype(dtype)
|
|
output_samples = samples.copy()
|
|
if dtype in np.sctypes['int']:
|
|
bits = np.iinfo(dtype).bits
|
|
output_samples *= (2**(bits - 1) / 1.)
|
|
min_val = np.iinfo(dtype).min
|
|
max_val = np.iinfo(dtype).max
|
|
output_samples[output_samples > max_val] = max_val
|
|
output_samples[output_samples < min_val] = min_val
|
|
elif samples.dtype in np.sctypes['float']:
|
|
min_val = np.finfo(dtype).min
|
|
max_val = np.finfo(dtype).max
|
|
output_samples[output_samples > max_val] = max_val
|
|
output_samples[output_samples < min_val] = min_val
|
|
else:
|
|
raise TypeError("Unsupported sample type: %s." % samples.dtype)
|
|
return output_samples.astype(dtype)
|