From 115a06bb3739715d75cdadc3b6bc813acd328c99 Mon Sep 17 00:00:00 2001 From: chrisxu2016 <823254351@qq.com> Date: Tue, 20 Jun 2017 16:24:03 +0800 Subject: [PATCH 01/12] add augmentor class --- data_utils/audio.py | 2 +- data_utils/augmentor/augmentation.py | 9 ++++ .../online_bayesian_normalization.py | 50 +++++++++++++++++++ data_utils/augmentor/resample.py | 30 +++++++++++ data_utils/augmentor/speed_perturb.py | 43 ++++++++++++++++ data_utils/augmentor/volume_perturb.py | 2 +- 6 files changed, 134 insertions(+), 2 deletions(-) mode change 100644 => 100755 data_utils/audio.py mode change 100644 => 100755 data_utils/augmentor/augmentation.py create mode 100755 data_utils/augmentor/online_bayesian_normalization.py create mode 100755 data_utils/augmentor/resample.py create mode 100755 data_utils/augmentor/speed_perturb.py mode change 100644 => 100755 data_utils/augmentor/volume_perturb.py diff --git a/data_utils/audio.py b/data_utils/audio.py old mode 100644 new mode 100755 index 5d02feb6..03e2d5e4 --- a/data_utils/audio.py +++ b/data_utils/audio.py @@ -308,7 +308,7 @@ class AudioSegment(object): prior_mean_squared = 10.**(prior_db / 10.) prior_sum_of_squares = prior_mean_squared * prior_samples cumsum_of_squares = np.cumsum(self.samples**2) - sample_count = np.arange(len(self.num_samples)) + 1 + sample_count = np.arange(self.num_samples) + 1 if startup_sample_idx > 0: cumsum_of_squares[:startup_sample_idx] = \ cumsum_of_squares[startup_sample_idx] diff --git a/data_utils/augmentor/augmentation.py b/data_utils/augmentor/augmentation.py old mode 100644 new mode 100755 index abe1a0ec..bfe7075e --- a/data_utils/augmentor/augmentation.py +++ b/data_utils/augmentor/augmentation.py @@ -6,6 +6,9 @@ from __future__ import print_function import json import random from data_utils.augmentor.volume_perturb import VolumePerturbAugmentor +from data_utils.augmentor.speed_perturb import SpeedPerturbAugmentor +from data_utils.augmentor.resample import ResampleAugmentor +from data_utils.augmentor.online_bayesian_normalization import OnlineBayesianNormalizationAugmentor class AugmentationPipeline(object): @@ -76,5 +79,11 @@ class AugmentationPipeline(object): """Return an augmentation model by the type name, and pass in params.""" if augmentor_type == "volume": return VolumePerturbAugmentor(self._rng, **params) + if augmentor_type == "speed": + return SpeedPerturbAugmentor(self._rng, **params) + if augmentor_type == "resample": + return ResampleAugmentor(self._rng, **params) + if augmentor_type == "baysian_normal": + return OnlineBayesianNormalizationAugmentor(self._rng, **params) else: raise ValueError("Unknown augmentor type [%s]." % augmentor_type) diff --git a/data_utils/augmentor/online_bayesian_normalization.py b/data_utils/augmentor/online_bayesian_normalization.py new file mode 100755 index 00000000..bb999912 --- /dev/null +++ b/data_utils/augmentor/online_bayesian_normalization.py @@ -0,0 +1,50 @@ +"""Contain the online bayesian normalization augmentation model.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from data_utils.augmentor.base import AugmentorBase + + +class OnlineBayesianNormalizationAugmentor(AugmentorBase): + """Augmentation model for adding online bayesian normalization. + + :param rng: Random generator object. + :type rng: random.Random + :param target_db: Target RMS value in decibels. + :type target_db: float + :param prior_db: Prior RMS estimate in decibels. + :type prior_db: float + :param prior_samples: Prior strength in number of samples. + :type prior_samples: int + :param startup_delay: Default 0.0s. If provided, this function will + accrue statistics for the first startup_delay + seconds before applying online normalization. + :type starup_delay: float. + """ + + def __init__(self, + rng, + target_db, + prior_db, + prior_samples, + startup_delay=0.0): + self._target_db = target_db + self._prior_db = prior_db + self._prior_samples = prior_samples + self._startup_delay = startup_delay + self._rng = rng + self._startup_delay=startup_delay + + def transform_audio(self, audio_segment): + """Normalizes the input audio using the online Bayesian approach. + + Note that this is an in-place transformation. + + :param audio_segment: Audio segment to add effects to. + :type audio_segment: AudioSegment|SpeechSegment + """ + audio_segment.normalize_online_bayesian(self._target_db, + self._prior_db, + self._prior_samples, + self._startup_delay) diff --git a/data_utils/augmentor/resample.py b/data_utils/augmentor/resample.py new file mode 100755 index 00000000..88ef7ed0 --- /dev/null +++ b/data_utils/augmentor/resample.py @@ -0,0 +1,30 @@ +"""Contain the resample augmentation model.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from data_utils.augmentor.base import AugmentorBase + + +class ResampleAugmentor(AugmentorBase): + """Augmentation model for resampling. + + :param rng: Random generator object. + :type rng: random.Random + :param new_sample_rate: New sample rate in Hz + :type new_sample_rate: int + """ + + def __init__(self, rng, new_sample_rate): + self._new_sample_rate = new_sample_rate + self._rng = rng + + def transform_audio(self, audio_segment): + """Resamples the input audio to a target sample rate. + + Note that this is an in-place transformation. + + :param audio: Audio segment to add effects to. + :type audio: AudioSegment|SpeechSegment + """ + audio_segment.resample(self._new_sample_rate) \ No newline at end of file diff --git a/data_utils/augmentor/speed_perturb.py b/data_utils/augmentor/speed_perturb.py new file mode 100755 index 00000000..67de344c --- /dev/null +++ b/data_utils/augmentor/speed_perturb.py @@ -0,0 +1,43 @@ +"""Contain the speech perturbation augmentation model.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from data_utils.augmentor.base import AugmentorBase + + +class SpeedPerturbAugmentor(AugmentorBase): + """Augmentation model for adding speed perturbation. + + See reference paper here: + http://www.danielpovey.com/files/2015_interspeech_augmentation.pdf + + :param rng: Random generator object. + :type rng: random.Random + :param min_speed_rate: Lower bound of new speed rate to sample. + :type min_speed_rate: float + :param max_speed_rate: Upper bound of new speed rate to sample. + :type max_speed_rate: float + """ + + def __init__(self, rng, min_speed_rate, max_speed_rate): + + if (min_speed_rate < 0.5): + raise ValueError("Sampling speed below 0.9 can cause unnatural effects") + if (max_speed_rate > 1.5): + raise ValueError("Sampling speed above 1.1 can cause unnatural effects") + self._min_speed_rate = min_speed_rate + self._max_speed_rate = max_speed_rate + self._rng = rng + + def transform_audio(self, audio_segment): + """Sample a new speed rate from the given range and + changes the speed of the given audio clip. + + Note that this is an in-place transformation. + + :param audio_segment: Audio segment to add effects to. + :type audio_segment: AudioSegment|SpeechSegment + """ + sampled_speed = self._rng.uniform(self._min_speed_rate, self._max_speed_rate) + audio_segment.change_speed(sampled_speed) diff --git a/data_utils/augmentor/volume_perturb.py b/data_utils/augmentor/volume_perturb.py old mode 100644 new mode 100755 index a5a9f6ca..62631fb0 --- a/data_utils/augmentor/volume_perturb.py +++ b/data_utils/augmentor/volume_perturb.py @@ -36,5 +36,5 @@ class VolumePerturbAugmentor(AugmentorBase): :param audio_segment: Audio segment to add effects to. :type audio_segment: AudioSegmenet|SpeechSegment """ - gain = self._rng.uniform(min_gain_dBFS, max_gain_dBFS) + gain = self._rng.uniform(self._min_gain_dBFS, self._max_gain_dBFS) audio_segment.apply_gain(gain) From 71283d619da6fe0b11d26fde2c701118b55fc25a Mon Sep 17 00:00:00 2001 From: chrisxu2016 <823254351@qq.com> Date: Tue, 20 Jun 2017 16:33:28 +0800 Subject: [PATCH 02/12] add augmentor class --- data_utils/augmentor/resample.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/data_utils/augmentor/resample.py b/data_utils/augmentor/resample.py index 88ef7ed0..6634bbd5 100755 --- a/data_utils/augmentor/resample.py +++ b/data_utils/augmentor/resample.py @@ -11,7 +11,7 @@ class ResampleAugmentor(AugmentorBase): :param rng: Random generator object. :type rng: random.Random - :param new_sample_rate: New sample rate in Hz + :param new_sample_rate: New sample rate in Hz. :type new_sample_rate: int """ @@ -27,4 +27,4 @@ class ResampleAugmentor(AugmentorBase): :param audio: Audio segment to add effects to. :type audio: AudioSegment|SpeechSegment """ - audio_segment.resample(self._new_sample_rate) \ No newline at end of file + audio_segment.resample(self._new_sample_rate) From d64f470078056e1a0e3828ef30c6127596caa30c Mon Sep 17 00:00:00 2001 From: chrisxu2016 <823254351@qq.com> Date: Tue, 20 Jun 2017 18:19:43 +0800 Subject: [PATCH 03/12] add augmentor class --- data_utils/augmentor/augmentation.py | 2 +- tests/test_augmentor.py | 60 ++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+), 1 deletion(-) create mode 100755 tests/test_augmentor.py diff --git a/data_utils/augmentor/augmentation.py b/data_utils/augmentor/augmentation.py index bfe7075e..08788008 100755 --- a/data_utils/augmentor/augmentation.py +++ b/data_utils/augmentor/augmentation.py @@ -83,7 +83,7 @@ class AugmentationPipeline(object): return SpeedPerturbAugmentor(self._rng, **params) if augmentor_type == "resample": return ResampleAugmentor(self._rng, **params) - if augmentor_type == "baysian_normal": + if augmentor_type == "bayesian_normal": return OnlineBayesianNormalizationAugmentor(self._rng, **params) else: raise ValueError("Unknown augmentor type [%s]." % augmentor_type) diff --git a/tests/test_augmentor.py b/tests/test_augmentor.py new file mode 100755 index 00000000..76fd321a --- /dev/null +++ b/tests/test_augmentor.py @@ -0,0 +1,60 @@ +"""Test augmentor class.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import unittest +from data_utils import audio +from data_utils.augmentor.augmentation import AugmentationPipeline +import random +import numpy as np + +random_seed=0 +#audio instance +audio_data=[3.05175781e-05, -8.54492188e-04, -1.09863281e-03, -9.46044922e-04,\ + -1.31225586e-03, -1.09863281e-03, -1.73950195e-03, -2.10571289e-03,\ + -2.04467773e-03, -1.46484375e-03, -1.43432617e-03, -9.46044922e-04,\ + -1.95312500e-03, -1.86157227e-03, -2.10571289e-03, -2.31933594e-03,\ + -2.01416016e-03, -2.62451172e-03, -2.07519531e-03, -2.38037109e-03] +audio_data = np.array(audio_data) +samplerate = 10 + +class TestAugmentor(unittest.TestCase): + def test_volume(self): + augmentation_config='[{"type": "volume","params": {"min_gain_dBFS": -15, "max_gain_dBFS": 15},"prob": 1.0}]' + augmentation_pipeline = AugmentationPipeline(augmentation_config=augmentation_config, + random_seed=random_seed) + audio_segment = audio.AudioSegment(audio_data, samplerate) + augmentation_pipeline.transform_audio(audio_segment) + original_audio = audio.AudioSegment(audio_data, samplerate) + self.assertFalse(np.any(audio_segment.samples == original_audio.samples)) + + def test_speed(self): + augmentation_config='[{"type": "speed","params": {"min_speed_rate": 1.2,"max_speed_rate": 1.4},"prob": 1.0}]' + augmentation_pipeline = AugmentationPipeline(augmentation_config=augmentation_config, + random_seed=random_seed) + audio_segment = audio.AudioSegment(audio_data, samplerate) + augmentation_pipeline.transform_audio(audio_segment) + original_audio = audio.AudioSegment(audio_data, samplerate) + self.assertFalse(np.any(audio_segment.samples == original_audio.samples)) + + def test_resample(self): + augmentation_config='[{"type": "resample","params": {"new_sample_rate":5},"prob": 1.0}]' + augmentation_pipeline = AugmentationPipeline(augmentation_config=augmentation_config, + random_seed=random_seed) + audio_segment = audio.AudioSegment(audio_data, samplerate) + augmentation_pipeline.transform_audio(audio_segment) + self.assertTrue(audio_segment.sample_rate == 5) + + def test_bayesial(self): + augmentation_config='[{"type": "bayesian_normal","params": {"target_db": -20, "prior_db": -4, "prior_samples": -8, "startup_delay": 0.0},"prob": 1.0}]' + augmentation_pipeline = AugmentationPipeline(augmentation_config=augmentation_config, + random_seed=random_seed) + audio_segment = audio.AudioSegment(audio_data, samplerate) + augmentation_pipeline.transform_audio(audio_segment) + original_audio = audio.AudioSegment(audio_data, samplerate) + self.assertFalse(np.any(audio_segment.samples == original_audio.samples)) + +if __name__ == '__main__': + unittest.main() + From df77c6d5dbb35a2ebd332aa9ad7044bddb52fe5e Mon Sep 17 00:00:00 2001 From: chrisxu2016 <823254351@qq.com> Date: Tue, 20 Jun 2017 18:39:48 +0800 Subject: [PATCH 04/12] Add 3 augmentor classes and related unittests --- tests/test_augmentor.py | 68 ++++++++++++++++++++++------------------- 1 file changed, 36 insertions(+), 32 deletions(-) diff --git a/tests/test_augmentor.py b/tests/test_augmentor.py index 76fd321a..17491704 100755 --- a/tests/test_augmentor.py +++ b/tests/test_augmentor.py @@ -11,49 +11,53 @@ import numpy as np random_seed=0 #audio instance -audio_data=[3.05175781e-05, -8.54492188e-04, -1.09863281e-03, -9.46044922e-04,\ - -1.31225586e-03, -1.09863281e-03, -1.73950195e-03, -2.10571289e-03,\ - -2.04467773e-03, -1.46484375e-03, -1.43432617e-03, -9.46044922e-04,\ - -1.95312500e-03, -1.86157227e-03, -2.10571289e-03, -2.31933594e-03,\ - -2.01416016e-03, -2.62451172e-03, -2.07519531e-03, -2.38037109e-03] +audio_data = [3.0517571e-05, -8.54492188e-04, -1.09863281e-03, -9.4604492e-04,\ + -1.31225586e-03, -1.09863281e-03, -1.73950195e-03, -2.1057189e-03,\ + -2.04467773e-03, -1.46484375e-03, -1.43432617e-03, -9.4604492e-04,\ + -1.95312500e-03, -1.86157227e-03, -2.10571289e-03, -2.3193354e-03,\ + -2.01416016e-03, -2.62451172e-03, -2.07519531e-03, -2.3803719e-03] audio_data = np.array(audio_data) samplerate = 10 class TestAugmentor(unittest.TestCase): def test_volume(self): - augmentation_config='[{"type": "volume","params": {"min_gain_dBFS": -15, "max_gain_dBFS": 15},"prob": 1.0}]' - augmentation_pipeline = AugmentationPipeline(augmentation_config=augmentation_config, - random_seed=random_seed) - audio_segment = audio.AudioSegment(audio_data, samplerate) - augmentation_pipeline.transform_audio(audio_segment) - original_audio = audio.AudioSegment(audio_data, samplerate) - self.assertFalse(np.any(audio_segment.samples == original_audio.samples)) + config_json = '[{"type": "volume","params": {"min_gain_dBFS": -15, '\ + '"max_gain_dBFS": 15},"prob": 1.0}]' + aug_pipeline = AugmentationPipeline(augmentation_config=config_json, + random_seed=random_seed) + audio_seg = audio.AudioSegment(audio_data, samplerate) + aug_pipeline.transform_audio(audio_seg) + orig_audio = audio.AudioSegment(audio_data, samplerate) + self.assertFalse(np.any(audio_seg.samples == orig_audio.samples)) def test_speed(self): - augmentation_config='[{"type": "speed","params": {"min_speed_rate": 1.2,"max_speed_rate": 1.4},"prob": 1.0}]' - augmentation_pipeline = AugmentationPipeline(augmentation_config=augmentation_config, - random_seed=random_seed) - audio_segment = audio.AudioSegment(audio_data, samplerate) - augmentation_pipeline.transform_audio(audio_segment) - original_audio = audio.AudioSegment(audio_data, samplerate) - self.assertFalse(np.any(audio_segment.samples == original_audio.samples)) + config_json = '[{"type":"speed","params": {"min_speed_rate": 1.2,' \ + '"max_speed_rate": 1.4},"prob": 1.0}]' + aug_pipeline = AugmentationPipeline(augmentation_config=config_json, + random_seed=random_seed) + audio_seg = audio.AudioSegment(audio_data, samplerate) + aug_pipeline.transform_audio(audio_seg) + orig_audio = audio.AudioSegment(audio_data, samplerate) + self.assertFalse(np.any(audio_seg.samples == orig_audio.samples)) def test_resample(self): - augmentation_config='[{"type": "resample","params": {"new_sample_rate":5},"prob": 1.0}]' - augmentation_pipeline = AugmentationPipeline(augmentation_config=augmentation_config, - random_seed=random_seed) - audio_segment = audio.AudioSegment(audio_data, samplerate) - augmentation_pipeline.transform_audio(audio_segment) - self.assertTrue(audio_segment.sample_rate == 5) + config_json = '[{"type":"resample","params": {"new_sample_rate":5},'\ + '"prob": 1.0}]' + aug_pipeline = AugmentationPipeline(augmentation_config=config_json, + random_seed=random_seed) + audio_seg = audio.AudioSegment(audio_data, samplerate) + aug_pipeline.transform_audio(audio_seg) + self.assertTrue(audio_seg.sample_rate == 5) def test_bayesial(self): - augmentation_config='[{"type": "bayesian_normal","params": {"target_db": -20, "prior_db": -4, "prior_samples": -8, "startup_delay": 0.0},"prob": 1.0}]' - augmentation_pipeline = AugmentationPipeline(augmentation_config=augmentation_config, - random_seed=random_seed) - audio_segment = audio.AudioSegment(audio_data, samplerate) - augmentation_pipeline.transform_audio(audio_segment) - original_audio = audio.AudioSegment(audio_data, samplerate) - self.assertFalse(np.any(audio_segment.samples == original_audio.samples)) + config_json = '[{"type":"bayesian_normal","params":{"target_db":-20,' \ + '"prior_db":-4, "prior_samples": -8, "startup_delay": 0.0},"prob":1.0}]' + aug_pipeline = AugmentationPipeline(augmentation_config=config_json, + random_seed=random_seed) + audio_seg = audio.AudioSegment(audio_data, samplerate) + aug_pipeline.transform_audio(audio_seg) + orig_audio = audio.AudioSegment(audio_data, samplerate) + self.assertFalse(np.any(audio_seg.samples == orig_audio.samples)) if __name__ == '__main__': unittest.main() From 5398360e5f5bcbc1d48945395204bd9b708a6768 Mon Sep 17 00:00:00 2001 From: chrisxu2016 <823254351@qq.com> Date: Tue, 20 Jun 2017 18:50:13 +0800 Subject: [PATCH 05/12] Add 3 augmentor classes and related unittests --- tests/test_augmentor.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/test_augmentor.py b/tests/test_augmentor.py index 17491704..57596e63 100755 --- a/tests/test_augmentor.py +++ b/tests/test_augmentor.py @@ -9,8 +9,7 @@ from data_utils.augmentor.augmentation import AugmentationPipeline import random import numpy as np -random_seed=0 -#audio instance +random_seed = 0 audio_data = [3.0517571e-05, -8.54492188e-04, -1.09863281e-03, -9.4604492e-04,\ -1.31225586e-03, -1.09863281e-03, -1.73950195e-03, -2.1057189e-03,\ -2.04467773e-03, -1.46484375e-03, -1.43432617e-03, -9.4604492e-04,\ @@ -19,12 +18,13 @@ audio_data = [3.0517571e-05, -8.54492188e-04, -1.09863281e-03, -9.4604492e-04,\ audio_data = np.array(audio_data) samplerate = 10 + class TestAugmentor(unittest.TestCase): def test_volume(self): config_json = '[{"type": "volume","params": {"min_gain_dBFS": -15, '\ '"max_gain_dBFS": 15},"prob": 1.0}]' - aug_pipeline = AugmentationPipeline(augmentation_config=config_json, - random_seed=random_seed) + aug_pipeline = AugmentationPipeline( + augmentation_config=config_json, random_seed=random_seed) audio_seg = audio.AudioSegment(audio_data, samplerate) aug_pipeline.transform_audio(audio_seg) orig_audio = audio.AudioSegment(audio_data, samplerate) @@ -33,8 +33,8 @@ class TestAugmentor(unittest.TestCase): def test_speed(self): config_json = '[{"type":"speed","params": {"min_speed_rate": 1.2,' \ '"max_speed_rate": 1.4},"prob": 1.0}]' - aug_pipeline = AugmentationPipeline(augmentation_config=config_json, - random_seed=random_seed) + aug_pipeline = AugmentationPipeline( + augmentation_config=config_json, random_seed=random_seed) audio_seg = audio.AudioSegment(audio_data, samplerate) aug_pipeline.transform_audio(audio_seg) orig_audio = audio.AudioSegment(audio_data, samplerate) @@ -43,8 +43,8 @@ class TestAugmentor(unittest.TestCase): def test_resample(self): config_json = '[{"type":"resample","params": {"new_sample_rate":5},'\ '"prob": 1.0}]' - aug_pipeline = AugmentationPipeline(augmentation_config=config_json, - random_seed=random_seed) + aug_pipeline = AugmentationPipeline( + augmentation_config=config_json, random_seed=random_seed) audio_seg = audio.AudioSegment(audio_data, samplerate) aug_pipeline.transform_audio(audio_seg) self.assertTrue(audio_seg.sample_rate == 5) @@ -52,13 +52,13 @@ class TestAugmentor(unittest.TestCase): def test_bayesial(self): config_json = '[{"type":"bayesian_normal","params":{"target_db":-20,' \ '"prior_db":-4, "prior_samples": -8, "startup_delay": 0.0},"prob":1.0}]' - aug_pipeline = AugmentationPipeline(augmentation_config=config_json, - random_seed=random_seed) + aug_pipeline = AugmentationPipeline( + augmentation_config=config_json, random_seed=random_seed) audio_seg = audio.AudioSegment(audio_data, samplerate) aug_pipeline.transform_audio(audio_seg) orig_audio = audio.AudioSegment(audio_data, samplerate) self.assertFalse(np.any(audio_seg.samples == orig_audio.samples)) + if __name__ == '__main__': unittest.main() - From 2450591a440dfc863cce53152416e594bdfff6b3 Mon Sep 17 00:00:00 2001 From: chrisxu2016 <823254351@qq.com> Date: Wed, 21 Jun 2017 11:47:15 +0800 Subject: [PATCH 06/12] add 3 augmentor class and change resample module --- data_utils/audio.py | 16 ++++---- data_utils/augmentor/resample.py | 5 ++- requirements.txt | 1 + tests/test_augmentor.py | 64 -------------------------------- 4 files changed, 12 insertions(+), 74 deletions(-) mode change 100644 => 100755 requirements.txt delete mode 100755 tests/test_augmentor.py diff --git a/data_utils/audio.py b/data_utils/audio.py index 03e2d5e4..f80425ea 100755 --- a/data_utils/audio.py +++ b/data_utils/audio.py @@ -6,7 +6,7 @@ from __future__ import print_function import numpy as np import io import soundfile -import scikits.samplerate +import resampy from scipy import signal import random import copy @@ -321,21 +321,19 @@ class AudioSegment(object): gain_db = target_db - rms_estimate_db self.apply_gain(gain_db) - def resample(self, target_sample_rate, quality='sinc_medium'): + def resample(self, target_sample_rate, filter='kaiser_best'): """Resample the audio to a target sample rate. Note that this is an in-place transformation. :param target_sample_rate: Target sample rate. :type target_sample_rate: int - :param quality: One of {'sinc_fastest', 'sinc_medium', 'sinc_best'}. - Sets resampling speed/quality tradeoff. - See http://www.mega-nerd.com/SRC/api_misc.html#Converters - :type quality: str + :param filter: The resampling filter to use one of {'kaiser_best', + 'kaiser_fast'}. + :type filter: str """ - resample_ratio = target_sample_rate / self._sample_rate - self._samples = scikits.samplerate.resample( - self._samples, r=resample_ratio, type=quality) + self._samples = resampy.resample( + self.samples, self.sample_rate, target_sample_rate, filter=filter) self._sample_rate = target_sample_rate def pad_silence(self, duration, sides='both'): diff --git a/data_utils/augmentor/resample.py b/data_utils/augmentor/resample.py index 6634bbd5..529b5fec 100755 --- a/data_utils/augmentor/resample.py +++ b/data_utils/augmentor/resample.py @@ -8,6 +8,9 @@ from data_utils.augmentor.base import AugmentorBase class ResampleAugmentor(AugmentorBase): """Augmentation model for resampling. + + See more info here: + https://ccrma.stanford.edu/~jos/resample/index.html :param rng: Random generator object. :type rng: random.Random @@ -27,4 +30,4 @@ class ResampleAugmentor(AugmentorBase): :param audio: Audio segment to add effects to. :type audio: AudioSegment|SpeechSegment """ - audio_segment.resample(self._new_sample_rate) + audio_segment.resample(self._new_sample_rate) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt old mode 100644 new mode 100755 index 0183ecf0..d712787f --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ SoundFile==0.9.0.post1 wget==3.2 scipy==0.13.1 +resampy==0.1.5 \ No newline at end of file diff --git a/tests/test_augmentor.py b/tests/test_augmentor.py deleted file mode 100755 index 57596e63..00000000 --- a/tests/test_augmentor.py +++ /dev/null @@ -1,64 +0,0 @@ -"""Test augmentor class.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import unittest -from data_utils import audio -from data_utils.augmentor.augmentation import AugmentationPipeline -import random -import numpy as np - -random_seed = 0 -audio_data = [3.0517571e-05, -8.54492188e-04, -1.09863281e-03, -9.4604492e-04,\ - -1.31225586e-03, -1.09863281e-03, -1.73950195e-03, -2.1057189e-03,\ - -2.04467773e-03, -1.46484375e-03, -1.43432617e-03, -9.4604492e-04,\ - -1.95312500e-03, -1.86157227e-03, -2.10571289e-03, -2.3193354e-03,\ - -2.01416016e-03, -2.62451172e-03, -2.07519531e-03, -2.3803719e-03] -audio_data = np.array(audio_data) -samplerate = 10 - - -class TestAugmentor(unittest.TestCase): - def test_volume(self): - config_json = '[{"type": "volume","params": {"min_gain_dBFS": -15, '\ - '"max_gain_dBFS": 15},"prob": 1.0}]' - aug_pipeline = AugmentationPipeline( - augmentation_config=config_json, random_seed=random_seed) - audio_seg = audio.AudioSegment(audio_data, samplerate) - aug_pipeline.transform_audio(audio_seg) - orig_audio = audio.AudioSegment(audio_data, samplerate) - self.assertFalse(np.any(audio_seg.samples == orig_audio.samples)) - - def test_speed(self): - config_json = '[{"type":"speed","params": {"min_speed_rate": 1.2,' \ - '"max_speed_rate": 1.4},"prob": 1.0}]' - aug_pipeline = AugmentationPipeline( - augmentation_config=config_json, random_seed=random_seed) - audio_seg = audio.AudioSegment(audio_data, samplerate) - aug_pipeline.transform_audio(audio_seg) - orig_audio = audio.AudioSegment(audio_data, samplerate) - self.assertFalse(np.any(audio_seg.samples == orig_audio.samples)) - - def test_resample(self): - config_json = '[{"type":"resample","params": {"new_sample_rate":5},'\ - '"prob": 1.0}]' - aug_pipeline = AugmentationPipeline( - augmentation_config=config_json, random_seed=random_seed) - audio_seg = audio.AudioSegment(audio_data, samplerate) - aug_pipeline.transform_audio(audio_seg) - self.assertTrue(audio_seg.sample_rate == 5) - - def test_bayesial(self): - config_json = '[{"type":"bayesian_normal","params":{"target_db":-20,' \ - '"prior_db":-4, "prior_samples": -8, "startup_delay": 0.0},"prob":1.0}]' - aug_pipeline = AugmentationPipeline( - augmentation_config=config_json, random_seed=random_seed) - audio_seg = audio.AudioSegment(audio_data, samplerate) - aug_pipeline.transform_audio(audio_seg) - orig_audio = audio.AudioSegment(audio_data, samplerate) - self.assertFalse(np.any(audio_seg.samples == orig_audio.samples)) - - -if __name__ == '__main__': - unittest.main() From d6a852a304babcd916d35c58ec0470162891c583 Mon Sep 17 00:00:00 2001 From: chrisxu2016 <823254351@qq.com> Date: Wed, 21 Jun 2017 12:11:43 +0800 Subject: [PATCH 07/12] modify setup.sh to delete the install of libsamplerate --- .../augmentor/online_bayesian_normalization.py | 6 ++---- setup.sh | 18 ------------------ 2 files changed, 2 insertions(+), 22 deletions(-) diff --git a/data_utils/augmentor/online_bayesian_normalization.py b/data_utils/augmentor/online_bayesian_normalization.py index bb999912..e488ac7d 100755 --- a/data_utils/augmentor/online_bayesian_normalization.py +++ b/data_utils/augmentor/online_bayesian_normalization.py @@ -32,9 +32,8 @@ class OnlineBayesianNormalizationAugmentor(AugmentorBase): self._target_db = target_db self._prior_db = prior_db self._prior_samples = prior_samples - self._startup_delay = startup_delay self._rng = rng - self._startup_delay=startup_delay + self._startup_delay = startup_delay def transform_audio(self, audio_segment): """Normalizes the input audio using the online Bayesian approach. @@ -44,7 +43,6 @@ class OnlineBayesianNormalizationAugmentor(AugmentorBase): :param audio_segment: Audio segment to add effects to. :type audio_segment: AudioSegment|SpeechSegment """ - audio_segment.normalize_online_bayesian(self._target_db, - self._prior_db, + audio_segment.normalize_online_bayesian(self._target_db, self._prior_db, self._prior_samples, self._startup_delay) diff --git a/setup.sh b/setup.sh index 1ae2a5ee..e0ce1c4e 100644 --- a/setup.sh +++ b/setup.sh @@ -9,22 +9,4 @@ if [ $? != 0 ]; then exit 1 fi -# install scikits.samplerate -curl -O "http://www.mega-nerd.com/SRC/libsamplerate-0.1.9.tar.gz" -if [ $? != 0 ]; then - echo "Download libsamplerate-0.1.9.tar.gz failed !!!" - exit 1 -fi -tar -xvf libsamplerate-0.1.9.tar.gz -cd libsamplerate-0.1.9 -./configure && make && make install -cd - -rm -rf libsamplerate-0.1.9 -rm libsamplerate-0.1.9.tar.gz -pip install scikits.samplerate==0.3.3 -if [ $? != 0 ]; then - echo "Install scikits.samplerate failed !!!" - exit 1 -fi - echo "Install all dependencies successfully." From b340d4ed2fbdc487b555e3395d3093410e014a98 Mon Sep 17 00:00:00 2001 From: chrisxu2016 <823254351@qq.com> Date: Wed, 21 Jun 2017 12:18:33 +0800 Subject: [PATCH 08/12] modify setup.sh to delete the install of libsamplerate --- data_utils/augmentor/speed_perturb.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/data_utils/augmentor/speed_perturb.py b/data_utils/augmentor/speed_perturb.py index 67de344c..3f880fbb 100755 --- a/data_utils/augmentor/speed_perturb.py +++ b/data_utils/augmentor/speed_perturb.py @@ -23,9 +23,11 @@ class SpeedPerturbAugmentor(AugmentorBase): def __init__(self, rng, min_speed_rate, max_speed_rate): if (min_speed_rate < 0.5): - raise ValueError("Sampling speed below 0.9 can cause unnatural effects") + raise ValueError("Sampling speed below 0.9 can cause unnatural "\ + "effects") if (max_speed_rate > 1.5): - raise ValueError("Sampling speed above 1.1 can cause unnatural effects") + raise ValueError("Sampling speed above 1.1 can cause unnatural "\ + "effects") self._min_speed_rate = min_speed_rate self._max_speed_rate = max_speed_rate self._rng = rng @@ -39,5 +41,6 @@ class SpeedPerturbAugmentor(AugmentorBase): :param audio_segment: Audio segment to add effects to. :type audio_segment: AudioSegment|SpeechSegment """ - sampled_speed = self._rng.uniform(self._min_speed_rate, self._max_speed_rate) + sampled_speed = self._rng.uniform(self._min_speed_rate, + self._max_speed_rate) audio_segment.change_speed(sampled_speed) From 6d6cdf40576dff0086e221a3d5e761530e24f811 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Mon, 26 Jun 2017 13:04:36 +0800 Subject: [PATCH 09/12] Refine SoundFile installation process. 1. Install libsndfile first. 2. Install SoundFile using pip. --- requirements.txt | 1 - setup.sh | 20 ++++++++++---------- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/requirements.txt b/requirements.txt index 0183ecf0..79272e7e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,2 @@ -SoundFile==0.9.0.post1 wget==3.2 scipy==0.13.1 diff --git a/setup.sh b/setup.sh index 1ae2a5ee..a801a0b2 100644 --- a/setup.sh +++ b/setup.sh @@ -1,7 +1,7 @@ #!/bin/bash # install python dependencies -if [ -f 'requirements.txt' ]; then +if [ -f "requirements.txt" ]; then pip install -r requirements.txt fi if [ $? != 0 ]; then @@ -9,21 +9,21 @@ if [ $? != 0 ]; then exit 1 fi -# install scikits.samplerate -curl -O "http://www.mega-nerd.com/SRC/libsamplerate-0.1.9.tar.gz" +# install package Soundfile +curl -O "http://www.mega-nerd.com/libsndfile/files/libsndfile-1.0.28.tar.gz" if [ $? != 0 ]; then - echo "Download libsamplerate-0.1.9.tar.gz failed !!!" + echo "Download libsndfile-1.0.28.tar.gz failed !!!" exit 1 fi -tar -xvf libsamplerate-0.1.9.tar.gz -cd libsamplerate-0.1.9 +tar -zxvf libsndfile-1.0.28.tar.gz +cd libsndfile-1.0.28 ./configure && make && make install cd - -rm -rf libsamplerate-0.1.9 -rm libsamplerate-0.1.9.tar.gz -pip install scikits.samplerate==0.3.3 +rm -rf libsndfile-1.0.28 +rm libsndfile-1.0.28.tar.gz +pip install SoundFile==0.9.0.post1 if [ $? != 0 ]; then - echo "Install scikits.samplerate failed !!!" + echo "Install SoundFile failed !!!" exit 1 fi From 29f6ae08076d9811ab6aae91ffff3c0dfaf7bc85 Mon Sep 17 00:00:00 2001 From: xushaoyong Date: Tue, 27 Jun 2017 17:16:35 +0800 Subject: [PATCH 10/12] modify audio resample function --- data_utils/audio.py | 1 - 1 file changed, 1 deletion(-) diff --git a/data_utils/audio.py b/data_utils/audio.py index 3d9b6c11..3891f5b9 100644 --- a/data_utils/audio.py +++ b/data_utils/audio.py @@ -332,7 +332,6 @@ class AudioSegment(object): 'kaiser_fast'}. :type filter: str """ - resample_ratio = target_sample_rate / self._sample_rate self._samples = resampy.resample( self.samples, self.sample_rate, target_sample_rate, filter=filter) self._sample_rate = target_sample_rate From e1e2914ec9f0972825c32c83531805ed458728d0 Mon Sep 17 00:00:00 2001 From: xushaoyong Date: Tue, 27 Jun 2017 18:06:16 +0800 Subject: [PATCH 11/12] remove augmentor unittest --- tests/test_augmentor.py | 65 ----------------------------------------- 1 file changed, 65 deletions(-) delete mode 100644 tests/test_augmentor.py diff --git a/tests/test_augmentor.py b/tests/test_augmentor.py deleted file mode 100644 index ee1f5439..00000000 --- a/tests/test_augmentor.py +++ /dev/null @@ -1,65 +0,0 @@ -"""Test augmentor class.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import unittest -from data_utils import audio -from data_utils.augmentor.augmentation import AugmentationPipeline -import random -import numpy as np - -random_seed = 0 -#audio instance -audio_data = [3.0517571e-05, -8.54492188e-04, -1.09863281e-03, -9.4604492e-04,\ - -1.31225586e-03, -1.09863281e-03, -1.73950195e-03, -2.1057189e-03,\ - -2.04467773e-03, -1.46484375e-03, -1.43432617e-03, -9.4604492e-04,\ - -1.95312500e-03, -1.86157227e-03, -2.10571289e-03, -2.3193354e-03,\ - -2.01416016e-03, -2.62451172e-03, -2.07519531e-03, -2.3803719e-03] -audio_data = np.array(audio_data) -samplerate = 10 - - -class TestAugmentor(unittest.TestCase): - def test_volume(self): - config_json = '[{"type": "volume","params": {"min_gain_dBFS": -15, '\ - '"max_gain_dBFS": 15},"prob": 1.0}]' - aug_pipeline = AugmentationPipeline( - augmentation_config=config_json, random_seed=random_seed) - audio_seg = audio.AudioSegment(audio_data, samplerate) - aug_pipeline.transform_audio(audio_seg) - orig_audio = audio.AudioSegment(audio_data, samplerate) - self.assertFalse(np.any(audio_seg.samples == orig_audio.samples)) - - def test_speed(self): - config_json = '[{"type":"speed","params": {"min_speed_rate": 0.9,' \ - '"max_speed_rate": 1.1},"prob": 1.0}]' - aug_pipeline = AugmentationPipeline( - augmentation_config=config_json, random_seed=random_seed) - audio_seg = audio.AudioSegment(audio_data, samplerate) - aug_pipeline.transform_audio(audio_seg) - orig_audio = audio.AudioSegment(audio_data, samplerate) - self.assertFalse(np.any(audio_seg.samples == orig_audio.samples)) - - def test_resample(self): - config_json = '[{"type":"resample","params": {"new_sample_rate":5},'\ - '"prob": 1.0}]' - aug_pipeline = AugmentationPipeline( - augmentation_config=config_json, random_seed=random_seed) - audio_seg = audio.AudioSegment(audio_data, samplerate) - aug_pipeline.transform_audio(audio_seg) - self.assertTrue(audio_seg.sample_rate == 5) - - def test_bayesial(self): - config_json = '[{"type":"bayesian_normal","params":{"target_db":-20,' \ - '"prior_db":-4, "prior_samples": -8, "startup_delay": 0.0},"prob":1.0}]' - aug_pipeline = AugmentationPipeline( - augmentation_config=config_json, random_seed=random_seed) - audio_seg = audio.AudioSegment(audio_data, samplerate) - aug_pipeline.transform_audio(audio_seg) - orig_audio = audio.AudioSegment(audio_data, samplerate) - self.assertFalse(np.any(audio_seg.samples == orig_audio.samples)) - - -if __name__ == '__main__': - unittest.main() From db37c34919e5cb7377e8ed863a17d206a0d28c39 Mon Sep 17 00:00:00 2001 From: xushaoyong Date: Tue, 27 Jun 2017 18:48:49 +0800 Subject: [PATCH 12/12] modify some detail of augmentor --- data_utils/augmentor/augmentation.py | 3 ++- data_utils/augmentor/speed_perturb.py | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/data_utils/augmentor/augmentation.py b/data_utils/augmentor/augmentation.py index f8fd214a..9dced473 100644 --- a/data_utils/augmentor/augmentation.py +++ b/data_utils/augmentor/augmentation.py @@ -9,7 +9,8 @@ from data_utils.augmentor.volume_perturb import VolumePerturbAugmentor from data_utils.augmentor.shift_perturb import ShiftPerturbAugmentor from data_utils.augmentor.speed_perturb import SpeedPerturbAugmentor from data_utils.augmentor.resample import ResampleAugmentor -from data_utils.augmentor.online_bayesian_normalization import OnlineBayesianNormalizationAugmentor +from data_utils.augmentor.online_bayesian_normalization import \ + OnlineBayesianNormalizationAugmentor class AugmentationPipeline(object): diff --git a/data_utils/augmentor/speed_perturb.py b/data_utils/augmentor/speed_perturb.py index 8c6c8b63..cc5738bd 100644 --- a/data_utils/augmentor/speed_perturb.py +++ b/data_utils/augmentor/speed_perturb.py @@ -15,10 +15,10 @@ class SpeedPerturbAugmentor(AugmentorBase): :param rng: Random generator object. :type rng: random.Random :param min_speed_rate: Lower bound of new speed rate to sample and should - not below 0.9. + not be smaller than 0.9. :type min_speed_rate: float :param max_speed_rate: Upper bound of new speed rate to sample and should - not above 1.1. + not be larger than 1.1. :type max_speed_rate: float """