diff --git a/data_utils/audio.py b/data_utils/audio.py index 03e2d5e40..f80425eac 100755 --- a/data_utils/audio.py +++ b/data_utils/audio.py @@ -6,7 +6,7 @@ from __future__ import print_function import numpy as np import io import soundfile -import scikits.samplerate +import resampy from scipy import signal import random import copy @@ -321,21 +321,19 @@ class AudioSegment(object): gain_db = target_db - rms_estimate_db self.apply_gain(gain_db) - def resample(self, target_sample_rate, quality='sinc_medium'): + def resample(self, target_sample_rate, filter='kaiser_best'): """Resample the audio to a target sample rate. Note that this is an in-place transformation. :param target_sample_rate: Target sample rate. :type target_sample_rate: int - :param quality: One of {'sinc_fastest', 'sinc_medium', 'sinc_best'}. - Sets resampling speed/quality tradeoff. - See http://www.mega-nerd.com/SRC/api_misc.html#Converters - :type quality: str + :param filter: The resampling filter to use one of {'kaiser_best', + 'kaiser_fast'}. + :type filter: str """ - resample_ratio = target_sample_rate / self._sample_rate - self._samples = scikits.samplerate.resample( - self._samples, r=resample_ratio, type=quality) + self._samples = resampy.resample( + self.samples, self.sample_rate, target_sample_rate, filter=filter) self._sample_rate = target_sample_rate def pad_silence(self, duration, sides='both'): diff --git a/data_utils/augmentor/resample.py b/data_utils/augmentor/resample.py index 6634bbd53..529b5fec1 100755 --- a/data_utils/augmentor/resample.py +++ b/data_utils/augmentor/resample.py @@ -8,6 +8,9 @@ from data_utils.augmentor.base import AugmentorBase class ResampleAugmentor(AugmentorBase): """Augmentation model for resampling. + + See more info here: + https://ccrma.stanford.edu/~jos/resample/index.html :param rng: Random generator object. :type rng: random.Random @@ -27,4 +30,4 @@ class ResampleAugmentor(AugmentorBase): :param audio: Audio segment to add effects to. :type audio: AudioSegment|SpeechSegment """ - audio_segment.resample(self._new_sample_rate) + audio_segment.resample(self._new_sample_rate) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt old mode 100644 new mode 100755 index 0183ecf01..d712787ff --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ SoundFile==0.9.0.post1 wget==3.2 scipy==0.13.1 +resampy==0.1.5 \ No newline at end of file diff --git a/tests/test_augmentor.py b/tests/test_augmentor.py deleted file mode 100755 index 57596e63c..000000000 --- a/tests/test_augmentor.py +++ /dev/null @@ -1,64 +0,0 @@ -"""Test augmentor class.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import unittest -from data_utils import audio -from data_utils.augmentor.augmentation import AugmentationPipeline -import random -import numpy as np - -random_seed = 0 -audio_data = [3.0517571e-05, -8.54492188e-04, -1.09863281e-03, -9.4604492e-04,\ - -1.31225586e-03, -1.09863281e-03, -1.73950195e-03, -2.1057189e-03,\ - -2.04467773e-03, -1.46484375e-03, -1.43432617e-03, -9.4604492e-04,\ - -1.95312500e-03, -1.86157227e-03, -2.10571289e-03, -2.3193354e-03,\ - -2.01416016e-03, -2.62451172e-03, -2.07519531e-03, -2.3803719e-03] -audio_data = np.array(audio_data) -samplerate = 10 - - -class TestAugmentor(unittest.TestCase): - def test_volume(self): - config_json = '[{"type": "volume","params": {"min_gain_dBFS": -15, '\ - '"max_gain_dBFS": 15},"prob": 1.0}]' - aug_pipeline = AugmentationPipeline( - augmentation_config=config_json, random_seed=random_seed) - audio_seg = audio.AudioSegment(audio_data, samplerate) - aug_pipeline.transform_audio(audio_seg) - orig_audio = audio.AudioSegment(audio_data, samplerate) - self.assertFalse(np.any(audio_seg.samples == orig_audio.samples)) - - def test_speed(self): - config_json = '[{"type":"speed","params": {"min_speed_rate": 1.2,' \ - '"max_speed_rate": 1.4},"prob": 1.0}]' - aug_pipeline = AugmentationPipeline( - augmentation_config=config_json, random_seed=random_seed) - audio_seg = audio.AudioSegment(audio_data, samplerate) - aug_pipeline.transform_audio(audio_seg) - orig_audio = audio.AudioSegment(audio_data, samplerate) - self.assertFalse(np.any(audio_seg.samples == orig_audio.samples)) - - def test_resample(self): - config_json = '[{"type":"resample","params": {"new_sample_rate":5},'\ - '"prob": 1.0}]' - aug_pipeline = AugmentationPipeline( - augmentation_config=config_json, random_seed=random_seed) - audio_seg = audio.AudioSegment(audio_data, samplerate) - aug_pipeline.transform_audio(audio_seg) - self.assertTrue(audio_seg.sample_rate == 5) - - def test_bayesial(self): - config_json = '[{"type":"bayesian_normal","params":{"target_db":-20,' \ - '"prior_db":-4, "prior_samples": -8, "startup_delay": 0.0},"prob":1.0}]' - aug_pipeline = AugmentationPipeline( - augmentation_config=config_json, random_seed=random_seed) - audio_seg = audio.AudioSegment(audio_data, samplerate) - aug_pipeline.transform_audio(audio_seg) - orig_audio = audio.AudioSegment(audio_data, samplerate) - self.assertFalse(np.any(audio_seg.samples == orig_audio.samples)) - - -if __name__ == '__main__': - unittest.main()