From 115a06bb3739715d75cdadc3b6bc813acd328c99 Mon Sep 17 00:00:00 2001
From: chrisxu2016 <823254351@qq.com>
Date: Tue, 20 Jun 2017 16:24:03 +0800
Subject: [PATCH 01/12] add augmentor class

---
 data_utils/audio.py                           |  2 +-
 data_utils/augmentor/augmentation.py          |  9 ++++
 .../online_bayesian_normalization.py          | 50 +++++++++++++++++++
 data_utils/augmentor/resample.py              | 30 +++++++++++
 data_utils/augmentor/speed_perturb.py         | 43 ++++++++++++++++
 data_utils/augmentor/volume_perturb.py        |  2 +-
 6 files changed, 134 insertions(+), 2 deletions(-)
 mode change 100644 => 100755 data_utils/audio.py
 mode change 100644 => 100755 data_utils/augmentor/augmentation.py
 create mode 100755 data_utils/augmentor/online_bayesian_normalization.py
 create mode 100755 data_utils/augmentor/resample.py
 create mode 100755 data_utils/augmentor/speed_perturb.py
 mode change 100644 => 100755 data_utils/augmentor/volume_perturb.py

diff --git a/data_utils/audio.py b/data_utils/audio.py
old mode 100644
new mode 100755
index 5d02feb6..03e2d5e4
--- a/data_utils/audio.py
+++ b/data_utils/audio.py
@@ -308,7 +308,7 @@ class AudioSegment(object):
         prior_mean_squared = 10.**(prior_db / 10.)
         prior_sum_of_squares = prior_mean_squared * prior_samples
         cumsum_of_squares = np.cumsum(self.samples**2)
-        sample_count = np.arange(len(self.num_samples)) + 1
+        sample_count = np.arange(self.num_samples) + 1
         if startup_sample_idx > 0:
             cumsum_of_squares[:startup_sample_idx] = \
                 cumsum_of_squares[startup_sample_idx]
diff --git a/data_utils/augmentor/augmentation.py b/data_utils/augmentor/augmentation.py
old mode 100644
new mode 100755
index abe1a0ec..bfe7075e
--- a/data_utils/augmentor/augmentation.py
+++ b/data_utils/augmentor/augmentation.py
@@ -6,6 +6,9 @@ from __future__ import print_function
 import json
 import random
 from data_utils.augmentor.volume_perturb import VolumePerturbAugmentor
+from data_utils.augmentor.speed_perturb import SpeedPerturbAugmentor
+from data_utils.augmentor.resample import ResampleAugmentor
+from data_utils.augmentor.online_bayesian_normalization import OnlineBayesianNormalizationAugmentor
 
 
 class AugmentationPipeline(object):
@@ -76,5 +79,11 @@ class AugmentationPipeline(object):
         """Return an augmentation model by the type name, and pass in params."""
         if augmentor_type == "volume":
             return VolumePerturbAugmentor(self._rng, **params)
+        if augmentor_type == "speed":
+            return SpeedPerturbAugmentor(self._rng, **params)
+        if augmentor_type == "resample":
+            return ResampleAugmentor(self._rng, **params)
+        if augmentor_type == "baysian_normal":
+            return OnlineBayesianNormalizationAugmentor(self._rng, **params)
         else:
             raise ValueError("Unknown augmentor type [%s]." % augmentor_type)
diff --git a/data_utils/augmentor/online_bayesian_normalization.py b/data_utils/augmentor/online_bayesian_normalization.py
new file mode 100755
index 00000000..bb999912
--- /dev/null
+++ b/data_utils/augmentor/online_bayesian_normalization.py
@@ -0,0 +1,50 @@
+"""Contain the online bayesian normalization augmentation model."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from data_utils.augmentor.base import AugmentorBase
+
+
+class OnlineBayesianNormalizationAugmentor(AugmentorBase):
+    """Augmentation model for adding online bayesian normalization.
+
+    :param rng: Random generator object.
+    :type rng: random.Random
+    :param target_db: Target RMS value in decibels.
+    :type target_db: float
+    :param prior_db: Prior RMS estimate in decibels.
+    :type prior_db: float
+    :param prior_samples: Prior strength in number of samples.
+    :type prior_samples: int
+    :param startup_delay: Default 0.0s. If provided, this function will
+                          accrue statistics for the first startup_delay 
+                          seconds before applying online normalization.
+    :type starup_delay: float.
+    """
+
+    def __init__(self,
+                 rng,
+                 target_db,
+                 prior_db,
+                 prior_samples,
+                 startup_delay=0.0):
+        self._target_db = target_db
+        self._prior_db = prior_db
+        self._prior_samples = prior_samples
+        self._startup_delay = startup_delay
+        self._rng = rng
+        self._startup_delay=startup_delay
+
+    def transform_audio(self, audio_segment):
+        """Normalizes the input audio using the online Bayesian approach.
+
+        Note that this is an in-place transformation.
+
+        :param audio_segment: Audio segment to add effects to.
+        :type audio_segment: AudioSegment|SpeechSegment
+        """
+        audio_segment.normalize_online_bayesian(self._target_db,
+                                                self._prior_db,
+                                                self._prior_samples,
+                                                self._startup_delay)
diff --git a/data_utils/augmentor/resample.py b/data_utils/augmentor/resample.py
new file mode 100755
index 00000000..88ef7ed0
--- /dev/null
+++ b/data_utils/augmentor/resample.py
@@ -0,0 +1,30 @@
+"""Contain the resample augmentation model."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from data_utils.augmentor.base import AugmentorBase
+
+
+class ResampleAugmentor(AugmentorBase):
+    """Augmentation model for resampling.
+    
+    :param rng: Random generator object.
+    :type rng: random.Random
+    :param new_sample_rate: New sample rate in Hz
+    :type new_sample_rate: int
+    """
+
+    def __init__(self, rng, new_sample_rate):
+        self._new_sample_rate = new_sample_rate
+        self._rng = rng
+
+    def transform_audio(self, audio_segment):
+        """Resamples the input audio to a target sample rate.
+
+        Note that this is an in-place transformation.
+
+        :param audio: Audio segment to add effects to.
+        :type audio: AudioSegment|SpeechSegment
+        """
+        audio_segment.resample(self._new_sample_rate)
\ No newline at end of file
diff --git a/data_utils/augmentor/speed_perturb.py b/data_utils/augmentor/speed_perturb.py
new file mode 100755
index 00000000..67de344c
--- /dev/null
+++ b/data_utils/augmentor/speed_perturb.py
@@ -0,0 +1,43 @@
+"""Contain the speech perturbation augmentation model."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from data_utils.augmentor.base import AugmentorBase
+
+
+class SpeedPerturbAugmentor(AugmentorBase):
+    """Augmentation model for adding speed perturbation.
+
+    See reference paper here:
+    http://www.danielpovey.com/files/2015_interspeech_augmentation.pdf
+
+    :param rng: Random generator object.
+    :type rng: random.Random
+    :param min_speed_rate: Lower bound of new speed rate to sample.
+    :type min_speed_rate: float
+    :param max_speed_rate: Upper bound of new speed rate to sample.
+    :type max_speed_rate: float
+    """
+
+    def __init__(self, rng, min_speed_rate, max_speed_rate):
+
+        if (min_speed_rate < 0.5):
+            raise ValueError("Sampling speed below 0.9 can cause unnatural effects")
+        if (max_speed_rate > 1.5):
+            raise ValueError("Sampling speed above 1.1 can cause unnatural effects")
+        self._min_speed_rate = min_speed_rate
+        self._max_speed_rate = max_speed_rate
+        self._rng = rng
+
+    def transform_audio(self, audio_segment):
+        """Sample a new speed rate from the given range and
+        changes the speed of the given audio clip.
+
+        Note that this is an in-place transformation.
+
+        :param audio_segment: Audio segment to add effects to.
+        :type audio_segment: AudioSegment|SpeechSegment
+        """
+        sampled_speed = self._rng.uniform(self._min_speed_rate, self._max_speed_rate)
+        audio_segment.change_speed(sampled_speed)
diff --git a/data_utils/augmentor/volume_perturb.py b/data_utils/augmentor/volume_perturb.py
old mode 100644
new mode 100755
index a5a9f6ca..62631fb0
--- a/data_utils/augmentor/volume_perturb.py
+++ b/data_utils/augmentor/volume_perturb.py
@@ -36,5 +36,5 @@ class VolumePerturbAugmentor(AugmentorBase):
         :param audio_segment: Audio segment to add effects to.
         :type audio_segment: AudioSegmenet|SpeechSegment
         """
-        gain = self._rng.uniform(min_gain_dBFS, max_gain_dBFS)
+        gain = self._rng.uniform(self._min_gain_dBFS, self._max_gain_dBFS)
         audio_segment.apply_gain(gain)

From 71283d619da6fe0b11d26fde2c701118b55fc25a Mon Sep 17 00:00:00 2001
From: chrisxu2016 <823254351@qq.com>
Date: Tue, 20 Jun 2017 16:33:28 +0800
Subject: [PATCH 02/12] add augmentor class

---
 data_utils/augmentor/resample.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/data_utils/augmentor/resample.py b/data_utils/augmentor/resample.py
index 88ef7ed0..6634bbd5 100755
--- a/data_utils/augmentor/resample.py
+++ b/data_utils/augmentor/resample.py
@@ -11,7 +11,7 @@ class ResampleAugmentor(AugmentorBase):
     
     :param rng: Random generator object.
     :type rng: random.Random
-    :param new_sample_rate: New sample rate in Hz
+    :param new_sample_rate: New sample rate in Hz.
     :type new_sample_rate: int
     """
 
@@ -27,4 +27,4 @@ class ResampleAugmentor(AugmentorBase):
         :param audio: Audio segment to add effects to.
         :type audio: AudioSegment|SpeechSegment
         """
-        audio_segment.resample(self._new_sample_rate)
\ No newline at end of file
+        audio_segment.resample(self._new_sample_rate)

From d64f470078056e1a0e3828ef30c6127596caa30c Mon Sep 17 00:00:00 2001
From: chrisxu2016 <823254351@qq.com>
Date: Tue, 20 Jun 2017 18:19:43 +0800
Subject: [PATCH 03/12] add augmentor class

---
 data_utils/augmentor/augmentation.py |  2 +-
 tests/test_augmentor.py              | 60 ++++++++++++++++++++++++++++
 2 files changed, 61 insertions(+), 1 deletion(-)
 create mode 100755 tests/test_augmentor.py

diff --git a/data_utils/augmentor/augmentation.py b/data_utils/augmentor/augmentation.py
index bfe7075e..08788008 100755
--- a/data_utils/augmentor/augmentation.py
+++ b/data_utils/augmentor/augmentation.py
@@ -83,7 +83,7 @@ class AugmentationPipeline(object):
             return SpeedPerturbAugmentor(self._rng, **params)
         if augmentor_type == "resample":
             return ResampleAugmentor(self._rng, **params)
-        if augmentor_type == "baysian_normal":
+        if augmentor_type == "bayesian_normal":
             return OnlineBayesianNormalizationAugmentor(self._rng, **params)
         else:
             raise ValueError("Unknown augmentor type [%s]." % augmentor_type)
diff --git a/tests/test_augmentor.py b/tests/test_augmentor.py
new file mode 100755
index 00000000..76fd321a
--- /dev/null
+++ b/tests/test_augmentor.py
@@ -0,0 +1,60 @@
+"""Test augmentor class."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import unittest
+from data_utils import audio
+from data_utils.augmentor.augmentation import AugmentationPipeline
+import random
+import numpy as np
+
+random_seed=0
+#audio instance
+audio_data=[3.05175781e-05, -8.54492188e-04, -1.09863281e-03, -9.46044922e-04,\
+            -1.31225586e-03, -1.09863281e-03, -1.73950195e-03, -2.10571289e-03,\
+            -2.04467773e-03, -1.46484375e-03, -1.43432617e-03, -9.46044922e-04,\
+            -1.95312500e-03, -1.86157227e-03, -2.10571289e-03, -2.31933594e-03,\
+            -2.01416016e-03, -2.62451172e-03, -2.07519531e-03, -2.38037109e-03]
+audio_data = np.array(audio_data)
+samplerate = 10
+
+class TestAugmentor(unittest.TestCase):
+    def test_volume(self):
+        augmentation_config='[{"type": "volume","params": {"min_gain_dBFS": -15, "max_gain_dBFS": 15},"prob": 1.0}]'
+        augmentation_pipeline = AugmentationPipeline(augmentation_config=augmentation_config,
+                                                     random_seed=random_seed)
+        audio_segment = audio.AudioSegment(audio_data, samplerate)
+        augmentation_pipeline.transform_audio(audio_segment)
+        original_audio = audio.AudioSegment(audio_data, samplerate)
+        self.assertFalse(np.any(audio_segment.samples == original_audio.samples))
+
+    def test_speed(self):
+        augmentation_config='[{"type": "speed","params": {"min_speed_rate": 1.2,"max_speed_rate": 1.4},"prob": 1.0}]'
+        augmentation_pipeline = AugmentationPipeline(augmentation_config=augmentation_config,
+                                                     random_seed=random_seed)
+        audio_segment = audio.AudioSegment(audio_data, samplerate)
+        augmentation_pipeline.transform_audio(audio_segment)
+        original_audio = audio.AudioSegment(audio_data, samplerate)
+        self.assertFalse(np.any(audio_segment.samples == original_audio.samples))
+
+    def test_resample(self):
+        augmentation_config='[{"type": "resample","params": {"new_sample_rate":5},"prob": 1.0}]'
+        augmentation_pipeline = AugmentationPipeline(augmentation_config=augmentation_config,
+                                                     random_seed=random_seed)
+        audio_segment = audio.AudioSegment(audio_data, samplerate)
+        augmentation_pipeline.transform_audio(audio_segment)
+        self.assertTrue(audio_segment.sample_rate == 5)
+
+    def test_bayesial(self):
+        augmentation_config='[{"type": "bayesian_normal","params": {"target_db": -20, "prior_db": -4, "prior_samples": -8, "startup_delay": 0.0},"prob": 1.0}]'
+        augmentation_pipeline = AugmentationPipeline(augmentation_config=augmentation_config,
+                                                     random_seed=random_seed)
+        audio_segment = audio.AudioSegment(audio_data, samplerate)
+        augmentation_pipeline.transform_audio(audio_segment)
+        original_audio = audio.AudioSegment(audio_data, samplerate)
+        self.assertFalse(np.any(audio_segment.samples == original_audio.samples))
+
+if __name__ == '__main__':
+    unittest.main()
+

From df77c6d5dbb35a2ebd332aa9ad7044bddb52fe5e Mon Sep 17 00:00:00 2001
From: chrisxu2016 <823254351@qq.com>
Date: Tue, 20 Jun 2017 18:39:48 +0800
Subject: [PATCH 04/12] Add 3 augmentor classes and related unittests

---
 tests/test_augmentor.py | 68 ++++++++++++++++++++++-------------------
 1 file changed, 36 insertions(+), 32 deletions(-)

diff --git a/tests/test_augmentor.py b/tests/test_augmentor.py
index 76fd321a..17491704 100755
--- a/tests/test_augmentor.py
+++ b/tests/test_augmentor.py
@@ -11,49 +11,53 @@ import numpy as np
 
 random_seed=0
 #audio instance
-audio_data=[3.05175781e-05, -8.54492188e-04, -1.09863281e-03, -9.46044922e-04,\
-            -1.31225586e-03, -1.09863281e-03, -1.73950195e-03, -2.10571289e-03,\
-            -2.04467773e-03, -1.46484375e-03, -1.43432617e-03, -9.46044922e-04,\
-            -1.95312500e-03, -1.86157227e-03, -2.10571289e-03, -2.31933594e-03,\
-            -2.01416016e-03, -2.62451172e-03, -2.07519531e-03, -2.38037109e-03]
+audio_data = [3.0517571e-05, -8.54492188e-04, -1.09863281e-03, -9.4604492e-04,\
+            -1.31225586e-03, -1.09863281e-03, -1.73950195e-03, -2.1057189e-03,\
+            -2.04467773e-03, -1.46484375e-03, -1.43432617e-03, -9.4604492e-04,\
+            -1.95312500e-03, -1.86157227e-03, -2.10571289e-03, -2.3193354e-03,\
+            -2.01416016e-03, -2.62451172e-03, -2.07519531e-03, -2.3803719e-03]
 audio_data = np.array(audio_data)
 samplerate = 10
 
 class TestAugmentor(unittest.TestCase):
     def test_volume(self):
-        augmentation_config='[{"type": "volume","params": {"min_gain_dBFS": -15, "max_gain_dBFS": 15},"prob": 1.0}]'
-        augmentation_pipeline = AugmentationPipeline(augmentation_config=augmentation_config,
-                                                     random_seed=random_seed)
-        audio_segment = audio.AudioSegment(audio_data, samplerate)
-        augmentation_pipeline.transform_audio(audio_segment)
-        original_audio = audio.AudioSegment(audio_data, samplerate)
-        self.assertFalse(np.any(audio_segment.samples == original_audio.samples))
+        config_json = '[{"type": "volume","params": {"min_gain_dBFS": -15, '\
+        '"max_gain_dBFS": 15},"prob": 1.0}]'
+        aug_pipeline = AugmentationPipeline(augmentation_config=config_json,
+                                            random_seed=random_seed)
+        audio_seg = audio.AudioSegment(audio_data, samplerate)
+        aug_pipeline.transform_audio(audio_seg)
+        orig_audio = audio.AudioSegment(audio_data, samplerate)
+        self.assertFalse(np.any(audio_seg.samples == orig_audio.samples))
 
     def test_speed(self):
-        augmentation_config='[{"type": "speed","params": {"min_speed_rate": 1.2,"max_speed_rate": 1.4},"prob": 1.0}]'
-        augmentation_pipeline = AugmentationPipeline(augmentation_config=augmentation_config,
-                                                     random_seed=random_seed)
-        audio_segment = audio.AudioSegment(audio_data, samplerate)
-        augmentation_pipeline.transform_audio(audio_segment)
-        original_audio = audio.AudioSegment(audio_data, samplerate)
-        self.assertFalse(np.any(audio_segment.samples == original_audio.samples))
+        config_json = '[{"type":"speed","params": {"min_speed_rate": 1.2,' \
+        '"max_speed_rate": 1.4},"prob": 1.0}]'
+        aug_pipeline = AugmentationPipeline(augmentation_config=config_json,
+                                            random_seed=random_seed)
+        audio_seg = audio.AudioSegment(audio_data, samplerate)
+        aug_pipeline.transform_audio(audio_seg)
+        orig_audio = audio.AudioSegment(audio_data, samplerate)
+        self.assertFalse(np.any(audio_seg.samples == orig_audio.samples))
 
     def test_resample(self):
-        augmentation_config='[{"type": "resample","params": {"new_sample_rate":5},"prob": 1.0}]'
-        augmentation_pipeline = AugmentationPipeline(augmentation_config=augmentation_config,
-                                                     random_seed=random_seed)
-        audio_segment = audio.AudioSegment(audio_data, samplerate)
-        augmentation_pipeline.transform_audio(audio_segment)
-        self.assertTrue(audio_segment.sample_rate == 5)
+        config_json = '[{"type":"resample","params": {"new_sample_rate":5},'\
+        '"prob": 1.0}]'
+        aug_pipeline = AugmentationPipeline(augmentation_config=config_json,
+                                            random_seed=random_seed)
+        audio_seg = audio.AudioSegment(audio_data, samplerate)
+        aug_pipeline.transform_audio(audio_seg)
+        self.assertTrue(audio_seg.sample_rate == 5)
 
     def test_bayesial(self):
-        augmentation_config='[{"type": "bayesian_normal","params": {"target_db": -20, "prior_db": -4, "prior_samples": -8, "startup_delay": 0.0},"prob": 1.0}]'
-        augmentation_pipeline = AugmentationPipeline(augmentation_config=augmentation_config,
-                                                     random_seed=random_seed)
-        audio_segment = audio.AudioSegment(audio_data, samplerate)
-        augmentation_pipeline.transform_audio(audio_segment)
-        original_audio = audio.AudioSegment(audio_data, samplerate)
-        self.assertFalse(np.any(audio_segment.samples == original_audio.samples))
+        config_json = '[{"type":"bayesian_normal","params":{"target_db":-20,' \
+        '"prior_db":-4, "prior_samples": -8, "startup_delay": 0.0},"prob":1.0}]'
+        aug_pipeline = AugmentationPipeline(augmentation_config=config_json,
+                                            random_seed=random_seed)
+        audio_seg = audio.AudioSegment(audio_data, samplerate)
+        aug_pipeline.transform_audio(audio_seg)
+        orig_audio = audio.AudioSegment(audio_data, samplerate)
+        self.assertFalse(np.any(audio_seg.samples == orig_audio.samples))
 
 if __name__ == '__main__':
     unittest.main()

From 5398360e5f5bcbc1d48945395204bd9b708a6768 Mon Sep 17 00:00:00 2001
From: chrisxu2016 <823254351@qq.com>
Date: Tue, 20 Jun 2017 18:50:13 +0800
Subject: [PATCH 05/12] Add 3 augmentor classes and related unittests

---
 tests/test_augmentor.py | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/tests/test_augmentor.py b/tests/test_augmentor.py
index 17491704..57596e63 100755
--- a/tests/test_augmentor.py
+++ b/tests/test_augmentor.py
@@ -9,8 +9,7 @@ from data_utils.augmentor.augmentation import AugmentationPipeline
 import random
 import numpy as np
 
-random_seed=0
-#audio instance
+random_seed = 0
 audio_data = [3.0517571e-05, -8.54492188e-04, -1.09863281e-03, -9.4604492e-04,\
             -1.31225586e-03, -1.09863281e-03, -1.73950195e-03, -2.1057189e-03,\
             -2.04467773e-03, -1.46484375e-03, -1.43432617e-03, -9.4604492e-04,\
@@ -19,12 +18,13 @@ audio_data = [3.0517571e-05, -8.54492188e-04, -1.09863281e-03, -9.4604492e-04,\
 audio_data = np.array(audio_data)
 samplerate = 10
 
+
 class TestAugmentor(unittest.TestCase):
     def test_volume(self):
         config_json = '[{"type": "volume","params": {"min_gain_dBFS": -15, '\
         '"max_gain_dBFS": 15},"prob": 1.0}]'
-        aug_pipeline = AugmentationPipeline(augmentation_config=config_json,
-                                            random_seed=random_seed)
+        aug_pipeline = AugmentationPipeline(
+            augmentation_config=config_json, random_seed=random_seed)
         audio_seg = audio.AudioSegment(audio_data, samplerate)
         aug_pipeline.transform_audio(audio_seg)
         orig_audio = audio.AudioSegment(audio_data, samplerate)
@@ -33,8 +33,8 @@ class TestAugmentor(unittest.TestCase):
     def test_speed(self):
         config_json = '[{"type":"speed","params": {"min_speed_rate": 1.2,' \
         '"max_speed_rate": 1.4},"prob": 1.0}]'
-        aug_pipeline = AugmentationPipeline(augmentation_config=config_json,
-                                            random_seed=random_seed)
+        aug_pipeline = AugmentationPipeline(
+            augmentation_config=config_json, random_seed=random_seed)
         audio_seg = audio.AudioSegment(audio_data, samplerate)
         aug_pipeline.transform_audio(audio_seg)
         orig_audio = audio.AudioSegment(audio_data, samplerate)
@@ -43,8 +43,8 @@ class TestAugmentor(unittest.TestCase):
     def test_resample(self):
         config_json = '[{"type":"resample","params": {"new_sample_rate":5},'\
         '"prob": 1.0}]'
-        aug_pipeline = AugmentationPipeline(augmentation_config=config_json,
-                                            random_seed=random_seed)
+        aug_pipeline = AugmentationPipeline(
+            augmentation_config=config_json, random_seed=random_seed)
         audio_seg = audio.AudioSegment(audio_data, samplerate)
         aug_pipeline.transform_audio(audio_seg)
         self.assertTrue(audio_seg.sample_rate == 5)
@@ -52,13 +52,13 @@ class TestAugmentor(unittest.TestCase):
     def test_bayesial(self):
         config_json = '[{"type":"bayesian_normal","params":{"target_db":-20,' \
         '"prior_db":-4, "prior_samples": -8, "startup_delay": 0.0},"prob":1.0}]'
-        aug_pipeline = AugmentationPipeline(augmentation_config=config_json,
-                                            random_seed=random_seed)
+        aug_pipeline = AugmentationPipeline(
+            augmentation_config=config_json, random_seed=random_seed)
         audio_seg = audio.AudioSegment(audio_data, samplerate)
         aug_pipeline.transform_audio(audio_seg)
         orig_audio = audio.AudioSegment(audio_data, samplerate)
         self.assertFalse(np.any(audio_seg.samples == orig_audio.samples))
 
+
 if __name__ == '__main__':
     unittest.main()
-

From 2450591a440dfc863cce53152416e594bdfff6b3 Mon Sep 17 00:00:00 2001
From: chrisxu2016 <823254351@qq.com>
Date: Wed, 21 Jun 2017 11:47:15 +0800
Subject: [PATCH 06/12] add 3 augmentor class and change resample module

---
 data_utils/audio.py              | 16 ++++----
 data_utils/augmentor/resample.py |  5 ++-
 requirements.txt                 |  1 +
 tests/test_augmentor.py          | 64 --------------------------------
 4 files changed, 12 insertions(+), 74 deletions(-)
 mode change 100644 => 100755 requirements.txt
 delete mode 100755 tests/test_augmentor.py

diff --git a/data_utils/audio.py b/data_utils/audio.py
index 03e2d5e4..f80425ea 100755
--- a/data_utils/audio.py
+++ b/data_utils/audio.py
@@ -6,7 +6,7 @@ from __future__ import print_function
 import numpy as np
 import io
 import soundfile
-import scikits.samplerate
+import resampy
 from scipy import signal
 import random
 import copy
@@ -321,21 +321,19 @@ class AudioSegment(object):
         gain_db = target_db - rms_estimate_db
         self.apply_gain(gain_db)
 
-    def resample(self, target_sample_rate, quality='sinc_medium'):
+    def resample(self, target_sample_rate, filter='kaiser_best'):
         """Resample the audio to a target sample rate.
 
         Note that this is an in-place transformation.
 
         :param target_sample_rate: Target sample rate.
         :type target_sample_rate: int
-        :param quality: One of {'sinc_fastest', 'sinc_medium', 'sinc_best'}.
-                        Sets resampling speed/quality tradeoff.
-                        See http://www.mega-nerd.com/SRC/api_misc.html#Converters
-        :type quality: str
+        :param filter: The resampling filter to use one of {'kaiser_best',
+                       'kaiser_fast'}.               
+        :type filter: str
         """
-        resample_ratio = target_sample_rate / self._sample_rate
-        self._samples = scikits.samplerate.resample(
-            self._samples, r=resample_ratio, type=quality)
+        self._samples = resampy.resample(
+            self.samples, self.sample_rate, target_sample_rate, filter=filter)
         self._sample_rate = target_sample_rate
 
     def pad_silence(self, duration, sides='both'):
diff --git a/data_utils/augmentor/resample.py b/data_utils/augmentor/resample.py
index 6634bbd5..529b5fec 100755
--- a/data_utils/augmentor/resample.py
+++ b/data_utils/augmentor/resample.py
@@ -8,6 +8,9 @@ from data_utils.augmentor.base import AugmentorBase
 
 class ResampleAugmentor(AugmentorBase):
     """Augmentation model for resampling.
+
+    See more info here:
+    https://ccrma.stanford.edu/~jos/resample/index.html
     
     :param rng: Random generator object.
     :type rng: random.Random
@@ -27,4 +30,4 @@ class ResampleAugmentor(AugmentorBase):
         :param audio: Audio segment to add effects to.
         :type audio: AudioSegment|SpeechSegment
         """
-        audio_segment.resample(self._new_sample_rate)
+        audio_segment.resample(self._new_sample_rate)
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
old mode 100644
new mode 100755
index 0183ecf0..d712787f
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,4 @@
 SoundFile==0.9.0.post1
 wget==3.2
 scipy==0.13.1
+resampy==0.1.5
\ No newline at end of file
diff --git a/tests/test_augmentor.py b/tests/test_augmentor.py
deleted file mode 100755
index 57596e63..00000000
--- a/tests/test_augmentor.py
+++ /dev/null
@@ -1,64 +0,0 @@
-"""Test augmentor class."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import unittest
-from data_utils import audio
-from data_utils.augmentor.augmentation import AugmentationPipeline
-import random
-import numpy as np
-
-random_seed = 0
-audio_data = [3.0517571e-05, -8.54492188e-04, -1.09863281e-03, -9.4604492e-04,\
-            -1.31225586e-03, -1.09863281e-03, -1.73950195e-03, -2.1057189e-03,\
-            -2.04467773e-03, -1.46484375e-03, -1.43432617e-03, -9.4604492e-04,\
-            -1.95312500e-03, -1.86157227e-03, -2.10571289e-03, -2.3193354e-03,\
-            -2.01416016e-03, -2.62451172e-03, -2.07519531e-03, -2.3803719e-03]
-audio_data = np.array(audio_data)
-samplerate = 10
-
-
-class TestAugmentor(unittest.TestCase):
-    def test_volume(self):
-        config_json = '[{"type": "volume","params": {"min_gain_dBFS": -15, '\
-        '"max_gain_dBFS": 15},"prob": 1.0}]'
-        aug_pipeline = AugmentationPipeline(
-            augmentation_config=config_json, random_seed=random_seed)
-        audio_seg = audio.AudioSegment(audio_data, samplerate)
-        aug_pipeline.transform_audio(audio_seg)
-        orig_audio = audio.AudioSegment(audio_data, samplerate)
-        self.assertFalse(np.any(audio_seg.samples == orig_audio.samples))
-
-    def test_speed(self):
-        config_json = '[{"type":"speed","params": {"min_speed_rate": 1.2,' \
-        '"max_speed_rate": 1.4},"prob": 1.0}]'
-        aug_pipeline = AugmentationPipeline(
-            augmentation_config=config_json, random_seed=random_seed)
-        audio_seg = audio.AudioSegment(audio_data, samplerate)
-        aug_pipeline.transform_audio(audio_seg)
-        orig_audio = audio.AudioSegment(audio_data, samplerate)
-        self.assertFalse(np.any(audio_seg.samples == orig_audio.samples))
-
-    def test_resample(self):
-        config_json = '[{"type":"resample","params": {"new_sample_rate":5},'\
-        '"prob": 1.0}]'
-        aug_pipeline = AugmentationPipeline(
-            augmentation_config=config_json, random_seed=random_seed)
-        audio_seg = audio.AudioSegment(audio_data, samplerate)
-        aug_pipeline.transform_audio(audio_seg)
-        self.assertTrue(audio_seg.sample_rate == 5)
-
-    def test_bayesial(self):
-        config_json = '[{"type":"bayesian_normal","params":{"target_db":-20,' \
-        '"prior_db":-4, "prior_samples": -8, "startup_delay": 0.0},"prob":1.0}]'
-        aug_pipeline = AugmentationPipeline(
-            augmentation_config=config_json, random_seed=random_seed)
-        audio_seg = audio.AudioSegment(audio_data, samplerate)
-        aug_pipeline.transform_audio(audio_seg)
-        orig_audio = audio.AudioSegment(audio_data, samplerate)
-        self.assertFalse(np.any(audio_seg.samples == orig_audio.samples))
-
-
-if __name__ == '__main__':
-    unittest.main()

From d6a852a304babcd916d35c58ec0470162891c583 Mon Sep 17 00:00:00 2001
From: chrisxu2016 <823254351@qq.com>
Date: Wed, 21 Jun 2017 12:11:43 +0800
Subject: [PATCH 07/12] modify setup.sh to delete the install of libsamplerate

---
 .../augmentor/online_bayesian_normalization.py |  6 ++----
 setup.sh                                       | 18 ------------------
 2 files changed, 2 insertions(+), 22 deletions(-)

diff --git a/data_utils/augmentor/online_bayesian_normalization.py b/data_utils/augmentor/online_bayesian_normalization.py
index bb999912..e488ac7d 100755
--- a/data_utils/augmentor/online_bayesian_normalization.py
+++ b/data_utils/augmentor/online_bayesian_normalization.py
@@ -32,9 +32,8 @@ class OnlineBayesianNormalizationAugmentor(AugmentorBase):
         self._target_db = target_db
         self._prior_db = prior_db
         self._prior_samples = prior_samples
-        self._startup_delay = startup_delay
         self._rng = rng
-        self._startup_delay=startup_delay
+        self._startup_delay = startup_delay
 
     def transform_audio(self, audio_segment):
         """Normalizes the input audio using the online Bayesian approach.
@@ -44,7 +43,6 @@ class OnlineBayesianNormalizationAugmentor(AugmentorBase):
         :param audio_segment: Audio segment to add effects to.
         :type audio_segment: AudioSegment|SpeechSegment
         """
-        audio_segment.normalize_online_bayesian(self._target_db,
-                                                self._prior_db,
+        audio_segment.normalize_online_bayesian(self._target_db, self._prior_db,
                                                 self._prior_samples,
                                                 self._startup_delay)
diff --git a/setup.sh b/setup.sh
index 1ae2a5ee..e0ce1c4e 100644
--- a/setup.sh
+++ b/setup.sh
@@ -9,22 +9,4 @@ if [ $? != 0 ]; then
     exit 1
 fi
 
-# install scikits.samplerate
-curl -O "http://www.mega-nerd.com/SRC/libsamplerate-0.1.9.tar.gz"
-if [ $? != 0 ]; then
-    echo "Download libsamplerate-0.1.9.tar.gz failed !!!"
-    exit 1
-fi
-tar -xvf libsamplerate-0.1.9.tar.gz
-cd libsamplerate-0.1.9
-./configure && make && make install
-cd -
-rm -rf libsamplerate-0.1.9
-rm libsamplerate-0.1.9.tar.gz
-pip install scikits.samplerate==0.3.3
-if [ $? != 0 ]; then
-    echo "Install scikits.samplerate failed !!!"
-    exit 1
-fi
-
 echo "Install all dependencies successfully."

From b340d4ed2fbdc487b555e3395d3093410e014a98 Mon Sep 17 00:00:00 2001
From: chrisxu2016 <823254351@qq.com>
Date: Wed, 21 Jun 2017 12:18:33 +0800
Subject: [PATCH 08/12] modify setup.sh to delete the install of libsamplerate

---
 data_utils/augmentor/speed_perturb.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/data_utils/augmentor/speed_perturb.py b/data_utils/augmentor/speed_perturb.py
index 67de344c..3f880fbb 100755
--- a/data_utils/augmentor/speed_perturb.py
+++ b/data_utils/augmentor/speed_perturb.py
@@ -23,9 +23,11 @@ class SpeedPerturbAugmentor(AugmentorBase):
     def __init__(self, rng, min_speed_rate, max_speed_rate):
 
         if (min_speed_rate < 0.5):
-            raise ValueError("Sampling speed below 0.9 can cause unnatural effects")
+            raise ValueError("Sampling speed below 0.9 can cause unnatural "\
+                             "effects")
         if (max_speed_rate > 1.5):
-            raise ValueError("Sampling speed above 1.1 can cause unnatural effects")
+            raise ValueError("Sampling speed above 1.1 can cause unnatural "\
+                             "effects")
         self._min_speed_rate = min_speed_rate
         self._max_speed_rate = max_speed_rate
         self._rng = rng
@@ -39,5 +41,6 @@ class SpeedPerturbAugmentor(AugmentorBase):
         :param audio_segment: Audio segment to add effects to.
         :type audio_segment: AudioSegment|SpeechSegment
         """
-        sampled_speed = self._rng.uniform(self._min_speed_rate, self._max_speed_rate)
+        sampled_speed = self._rng.uniform(self._min_speed_rate,
+                                          self._max_speed_rate)
         audio_segment.change_speed(sampled_speed)

From 6d6cdf40576dff0086e221a3d5e761530e24f811 Mon Sep 17 00:00:00 2001
From: yangyaming <mxscmxsc@gmail.com>
Date: Mon, 26 Jun 2017 13:04:36 +0800
Subject: [PATCH 09/12] Refine SoundFile installation process.     1. Install
 libsndfile first.     2. Install SoundFile using pip.

---
 requirements.txt |  1 -
 setup.sh         | 20 ++++++++++----------
 2 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 0183ecf0..79272e7e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,2 @@
-SoundFile==0.9.0.post1
 wget==3.2
 scipy==0.13.1
diff --git a/setup.sh b/setup.sh
index 1ae2a5ee..a801a0b2 100644
--- a/setup.sh
+++ b/setup.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 
 # install python dependencies
-if [ -f 'requirements.txt' ]; then
+if [ -f "requirements.txt" ]; then
     pip install -r requirements.txt
 fi
 if [ $? != 0 ]; then
@@ -9,21 +9,21 @@ if [ $? != 0 ]; then
     exit 1
 fi
 
-# install scikits.samplerate
-curl -O "http://www.mega-nerd.com/SRC/libsamplerate-0.1.9.tar.gz"
+# install package Soundfile
+curl -O "http://www.mega-nerd.com/libsndfile/files/libsndfile-1.0.28.tar.gz"
 if [ $? != 0 ]; then
-    echo "Download libsamplerate-0.1.9.tar.gz failed !!!"
+    echo "Download libsndfile-1.0.28.tar.gz failed !!!"
     exit 1
 fi
-tar -xvf libsamplerate-0.1.9.tar.gz
-cd libsamplerate-0.1.9
+tar -zxvf libsndfile-1.0.28.tar.gz
+cd libsndfile-1.0.28
 ./configure && make && make install
 cd -
-rm -rf libsamplerate-0.1.9
-rm libsamplerate-0.1.9.tar.gz
-pip install scikits.samplerate==0.3.3
+rm -rf libsndfile-1.0.28
+rm libsndfile-1.0.28.tar.gz
+pip install SoundFile==0.9.0.post1
 if [ $? != 0 ]; then
-    echo "Install scikits.samplerate failed !!!"
+    echo "Install SoundFile failed !!!"
     exit 1
 fi
 

From 29f6ae08076d9811ab6aae91ffff3c0dfaf7bc85 Mon Sep 17 00:00:00 2001
From: xushaoyong <xushaoyong@baidu.com>
Date: Tue, 27 Jun 2017 17:16:35 +0800
Subject: [PATCH 10/12] modify audio resample function

---
 data_utils/audio.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/data_utils/audio.py b/data_utils/audio.py
index 3d9b6c11..3891f5b9 100644
--- a/data_utils/audio.py
+++ b/data_utils/audio.py
@@ -332,7 +332,6 @@ class AudioSegment(object):
                        'kaiser_fast'}.
         :type filter: str
         """
-        resample_ratio = target_sample_rate / self._sample_rate
         self._samples = resampy.resample(
             self.samples, self.sample_rate, target_sample_rate, filter=filter)
         self._sample_rate = target_sample_rate

From e1e2914ec9f0972825c32c83531805ed458728d0 Mon Sep 17 00:00:00 2001
From: xushaoyong <xushaoyong@baidu.com>
Date: Tue, 27 Jun 2017 18:06:16 +0800
Subject: [PATCH 11/12] remove augmentor unittest

---
 tests/test_augmentor.py | 65 -----------------------------------------
 1 file changed, 65 deletions(-)
 delete mode 100644 tests/test_augmentor.py

diff --git a/tests/test_augmentor.py b/tests/test_augmentor.py
deleted file mode 100644
index ee1f5439..00000000
--- a/tests/test_augmentor.py
+++ /dev/null
@@ -1,65 +0,0 @@
-"""Test augmentor class."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import unittest
-from data_utils import audio
-from data_utils.augmentor.augmentation import AugmentationPipeline
-import random
-import numpy as np
-
-random_seed = 0
-#audio instance
-audio_data = [3.0517571e-05, -8.54492188e-04, -1.09863281e-03, -9.4604492e-04,\
-            -1.31225586e-03, -1.09863281e-03, -1.73950195e-03, -2.1057189e-03,\
-            -2.04467773e-03, -1.46484375e-03, -1.43432617e-03, -9.4604492e-04,\
-            -1.95312500e-03, -1.86157227e-03, -2.10571289e-03, -2.3193354e-03,\
-            -2.01416016e-03, -2.62451172e-03, -2.07519531e-03, -2.3803719e-03]
-audio_data = np.array(audio_data)
-samplerate = 10
-
-
-class TestAugmentor(unittest.TestCase):
-    def test_volume(self):
-        config_json = '[{"type": "volume","params": {"min_gain_dBFS": -15, '\
-        '"max_gain_dBFS": 15},"prob": 1.0}]'
-        aug_pipeline = AugmentationPipeline(
-            augmentation_config=config_json, random_seed=random_seed)
-        audio_seg = audio.AudioSegment(audio_data, samplerate)
-        aug_pipeline.transform_audio(audio_seg)
-        orig_audio = audio.AudioSegment(audio_data, samplerate)
-        self.assertFalse(np.any(audio_seg.samples == orig_audio.samples))
-
-    def test_speed(self):
-        config_json = '[{"type":"speed","params": {"min_speed_rate": 0.9,' \
-        '"max_speed_rate": 1.1},"prob": 1.0}]'
-        aug_pipeline = AugmentationPipeline(
-            augmentation_config=config_json, random_seed=random_seed)
-        audio_seg = audio.AudioSegment(audio_data, samplerate)
-        aug_pipeline.transform_audio(audio_seg)
-        orig_audio = audio.AudioSegment(audio_data, samplerate)
-        self.assertFalse(np.any(audio_seg.samples == orig_audio.samples))
-
-    def test_resample(self):
-        config_json = '[{"type":"resample","params": {"new_sample_rate":5},'\
-        '"prob": 1.0}]'
-        aug_pipeline = AugmentationPipeline(
-            augmentation_config=config_json, random_seed=random_seed)
-        audio_seg = audio.AudioSegment(audio_data, samplerate)
-        aug_pipeline.transform_audio(audio_seg)
-        self.assertTrue(audio_seg.sample_rate == 5)
-
-    def test_bayesial(self):
-        config_json = '[{"type":"bayesian_normal","params":{"target_db":-20,' \
-        '"prior_db":-4, "prior_samples": -8, "startup_delay": 0.0},"prob":1.0}]'
-        aug_pipeline = AugmentationPipeline(
-            augmentation_config=config_json, random_seed=random_seed)
-        audio_seg = audio.AudioSegment(audio_data, samplerate)
-        aug_pipeline.transform_audio(audio_seg)
-        orig_audio = audio.AudioSegment(audio_data, samplerate)
-        self.assertFalse(np.any(audio_seg.samples == orig_audio.samples))
-
-
-if __name__ == '__main__':
-    unittest.main()

From db37c34919e5cb7377e8ed863a17d206a0d28c39 Mon Sep 17 00:00:00 2001
From: xushaoyong <xushaoyong@baidu.com>
Date: Tue, 27 Jun 2017 18:48:49 +0800
Subject: [PATCH 12/12] modify some detail of augmentor

---
 data_utils/augmentor/augmentation.py  | 3 ++-
 data_utils/augmentor/speed_perturb.py | 4 ++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/data_utils/augmentor/augmentation.py b/data_utils/augmentor/augmentation.py
index f8fd214a..9dced473 100644
--- a/data_utils/augmentor/augmentation.py
+++ b/data_utils/augmentor/augmentation.py
@@ -9,7 +9,8 @@ from data_utils.augmentor.volume_perturb import VolumePerturbAugmentor
 from data_utils.augmentor.shift_perturb import ShiftPerturbAugmentor
 from data_utils.augmentor.speed_perturb import SpeedPerturbAugmentor
 from data_utils.augmentor.resample import ResampleAugmentor
-from data_utils.augmentor.online_bayesian_normalization import OnlineBayesianNormalizationAugmentor
+from data_utils.augmentor.online_bayesian_normalization import \
+     OnlineBayesianNormalizationAugmentor
 
 
 class AugmentationPipeline(object):
diff --git a/data_utils/augmentor/speed_perturb.py b/data_utils/augmentor/speed_perturb.py
index 8c6c8b63..cc5738bd 100644
--- a/data_utils/augmentor/speed_perturb.py
+++ b/data_utils/augmentor/speed_perturb.py
@@ -15,10 +15,10 @@ class SpeedPerturbAugmentor(AugmentorBase):
     :param rng: Random generator object.
     :type rng: random.Random
     :param min_speed_rate: Lower bound of new speed rate to sample and should
-                           not below 0.9.
+                           not be smaller than 0.9.
     :type min_speed_rate: float
     :param max_speed_rate: Upper bound of new speed rate to sample and should
-                           not above 1.1.
+                           not be larger than 1.1.
     :type max_speed_rate: float
     """