add 3 augmentor class and change resample module

pull/2/head
chrisxu2016 8 years ago
parent 5398360e5f
commit 2450591a44

@ -6,7 +6,7 @@ from __future__ import print_function
import numpy as np import numpy as np
import io import io
import soundfile import soundfile
import scikits.samplerate import resampy
from scipy import signal from scipy import signal
import random import random
import copy import copy
@ -321,21 +321,19 @@ class AudioSegment(object):
gain_db = target_db - rms_estimate_db gain_db = target_db - rms_estimate_db
self.apply_gain(gain_db) self.apply_gain(gain_db)
def resample(self, target_sample_rate, quality='sinc_medium'): def resample(self, target_sample_rate, filter='kaiser_best'):
"""Resample the audio to a target sample rate. """Resample the audio to a target sample rate.
Note that this is an in-place transformation. Note that this is an in-place transformation.
:param target_sample_rate: Target sample rate. :param target_sample_rate: Target sample rate.
:type target_sample_rate: int :type target_sample_rate: int
:param quality: One of {'sinc_fastest', 'sinc_medium', 'sinc_best'}. :param filter: The resampling filter to use one of {'kaiser_best',
Sets resampling speed/quality tradeoff. 'kaiser_fast'}.
See http://www.mega-nerd.com/SRC/api_misc.html#Converters :type filter: str
:type quality: str
""" """
resample_ratio = target_sample_rate / self._sample_rate self._samples = resampy.resample(
self._samples = scikits.samplerate.resample( self.samples, self.sample_rate, target_sample_rate, filter=filter)
self._samples, r=resample_ratio, type=quality)
self._sample_rate = target_sample_rate self._sample_rate = target_sample_rate
def pad_silence(self, duration, sides='both'): def pad_silence(self, duration, sides='both'):

@ -8,6 +8,9 @@ from data_utils.augmentor.base import AugmentorBase
class ResampleAugmentor(AugmentorBase): class ResampleAugmentor(AugmentorBase):
"""Augmentation model for resampling. """Augmentation model for resampling.
See more info here:
https://ccrma.stanford.edu/~jos/resample/index.html
:param rng: Random generator object. :param rng: Random generator object.
:type rng: random.Random :type rng: random.Random
@ -27,4 +30,4 @@ class ResampleAugmentor(AugmentorBase):
:param audio: Audio segment to add effects to. :param audio: Audio segment to add effects to.
:type audio: AudioSegment|SpeechSegment :type audio: AudioSegment|SpeechSegment
""" """
audio_segment.resample(self._new_sample_rate) audio_segment.resample(self._new_sample_rate)

@ -1,3 +1,4 @@
SoundFile==0.9.0.post1 SoundFile==0.9.0.post1
wget==3.2 wget==3.2
scipy==0.13.1 scipy==0.13.1
resampy==0.1.5

@ -1,64 +0,0 @@
"""Test augmentor class."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import unittest
from data_utils import audio
from data_utils.augmentor.augmentation import AugmentationPipeline
import random
import numpy as np
random_seed = 0
audio_data = [3.0517571e-05, -8.54492188e-04, -1.09863281e-03, -9.4604492e-04,\
-1.31225586e-03, -1.09863281e-03, -1.73950195e-03, -2.1057189e-03,\
-2.04467773e-03, -1.46484375e-03, -1.43432617e-03, -9.4604492e-04,\
-1.95312500e-03, -1.86157227e-03, -2.10571289e-03, -2.3193354e-03,\
-2.01416016e-03, -2.62451172e-03, -2.07519531e-03, -2.3803719e-03]
audio_data = np.array(audio_data)
samplerate = 10
class TestAugmentor(unittest.TestCase):
def test_volume(self):
config_json = '[{"type": "volume","params": {"min_gain_dBFS": -15, '\
'"max_gain_dBFS": 15},"prob": 1.0}]'
aug_pipeline = AugmentationPipeline(
augmentation_config=config_json, random_seed=random_seed)
audio_seg = audio.AudioSegment(audio_data, samplerate)
aug_pipeline.transform_audio(audio_seg)
orig_audio = audio.AudioSegment(audio_data, samplerate)
self.assertFalse(np.any(audio_seg.samples == orig_audio.samples))
def test_speed(self):
config_json = '[{"type":"speed","params": {"min_speed_rate": 1.2,' \
'"max_speed_rate": 1.4},"prob": 1.0}]'
aug_pipeline = AugmentationPipeline(
augmentation_config=config_json, random_seed=random_seed)
audio_seg = audio.AudioSegment(audio_data, samplerate)
aug_pipeline.transform_audio(audio_seg)
orig_audio = audio.AudioSegment(audio_data, samplerate)
self.assertFalse(np.any(audio_seg.samples == orig_audio.samples))
def test_resample(self):
config_json = '[{"type":"resample","params": {"new_sample_rate":5},'\
'"prob": 1.0}]'
aug_pipeline = AugmentationPipeline(
augmentation_config=config_json, random_seed=random_seed)
audio_seg = audio.AudioSegment(audio_data, samplerate)
aug_pipeline.transform_audio(audio_seg)
self.assertTrue(audio_seg.sample_rate == 5)
def test_bayesial(self):
config_json = '[{"type":"bayesian_normal","params":{"target_db":-20,' \
'"prior_db":-4, "prior_samples": -8, "startup_delay": 0.0},"prob":1.0}]'
aug_pipeline = AugmentationPipeline(
augmentation_config=config_json, random_seed=random_seed)
audio_seg = audio.AudioSegment(audio_data, samplerate)
aug_pipeline.transform_audio(audio_seg)
orig_audio = audio.AudioSegment(audio_data, samplerate)
self.assertFalse(np.any(audio_seg.samples == orig_audio.samples))
if __name__ == '__main__':
unittest.main()
Loading…
Cancel
Save