add 3 augmentor class and change resample module

pull/2/head
chrisxu2016 8 years ago
parent 5398360e5f
commit 2450591a44

@ -6,7 +6,7 @@ from __future__ import print_function
import numpy as np
import io
import soundfile
import scikits.samplerate
import resampy
from scipy import signal
import random
import copy
@ -321,21 +321,19 @@ class AudioSegment(object):
gain_db = target_db - rms_estimate_db
self.apply_gain(gain_db)
def resample(self, target_sample_rate, quality='sinc_medium'):
def resample(self, target_sample_rate, filter='kaiser_best'):
"""Resample the audio to a target sample rate.
Note that this is an in-place transformation.
:param target_sample_rate: Target sample rate.
:type target_sample_rate: int
:param quality: One of {'sinc_fastest', 'sinc_medium', 'sinc_best'}.
Sets resampling speed/quality tradeoff.
See http://www.mega-nerd.com/SRC/api_misc.html#Converters
:type quality: str
:param filter: The resampling filter to use one of {'kaiser_best',
'kaiser_fast'}.
:type filter: str
"""
resample_ratio = target_sample_rate / self._sample_rate
self._samples = scikits.samplerate.resample(
self._samples, r=resample_ratio, type=quality)
self._samples = resampy.resample(
self.samples, self.sample_rate, target_sample_rate, filter=filter)
self._sample_rate = target_sample_rate
def pad_silence(self, duration, sides='both'):

@ -8,6 +8,9 @@ from data_utils.augmentor.base import AugmentorBase
class ResampleAugmentor(AugmentorBase):
"""Augmentation model for resampling.
See more info here:
https://ccrma.stanford.edu/~jos/resample/index.html
:param rng: Random generator object.
:type rng: random.Random
@ -27,4 +30,4 @@ class ResampleAugmentor(AugmentorBase):
:param audio: Audio segment to add effects to.
:type audio: AudioSegment|SpeechSegment
"""
audio_segment.resample(self._new_sample_rate)
audio_segment.resample(self._new_sample_rate)

@ -1,3 +1,4 @@
SoundFile==0.9.0.post1
wget==3.2
scipy==0.13.1
resampy==0.1.5

@ -1,64 +0,0 @@
"""Test augmentor class."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import unittest
from data_utils import audio
from data_utils.augmentor.augmentation import AugmentationPipeline
import random
import numpy as np
random_seed = 0
audio_data = [3.0517571e-05, -8.54492188e-04, -1.09863281e-03, -9.4604492e-04,\
-1.31225586e-03, -1.09863281e-03, -1.73950195e-03, -2.1057189e-03,\
-2.04467773e-03, -1.46484375e-03, -1.43432617e-03, -9.4604492e-04,\
-1.95312500e-03, -1.86157227e-03, -2.10571289e-03, -2.3193354e-03,\
-2.01416016e-03, -2.62451172e-03, -2.07519531e-03, -2.3803719e-03]
audio_data = np.array(audio_data)
samplerate = 10
class TestAugmentor(unittest.TestCase):
def test_volume(self):
config_json = '[{"type": "volume","params": {"min_gain_dBFS": -15, '\
'"max_gain_dBFS": 15},"prob": 1.0}]'
aug_pipeline = AugmentationPipeline(
augmentation_config=config_json, random_seed=random_seed)
audio_seg = audio.AudioSegment(audio_data, samplerate)
aug_pipeline.transform_audio(audio_seg)
orig_audio = audio.AudioSegment(audio_data, samplerate)
self.assertFalse(np.any(audio_seg.samples == orig_audio.samples))
def test_speed(self):
config_json = '[{"type":"speed","params": {"min_speed_rate": 1.2,' \
'"max_speed_rate": 1.4},"prob": 1.0}]'
aug_pipeline = AugmentationPipeline(
augmentation_config=config_json, random_seed=random_seed)
audio_seg = audio.AudioSegment(audio_data, samplerate)
aug_pipeline.transform_audio(audio_seg)
orig_audio = audio.AudioSegment(audio_data, samplerate)
self.assertFalse(np.any(audio_seg.samples == orig_audio.samples))
def test_resample(self):
config_json = '[{"type":"resample","params": {"new_sample_rate":5},'\
'"prob": 1.0}]'
aug_pipeline = AugmentationPipeline(
augmentation_config=config_json, random_seed=random_seed)
audio_seg = audio.AudioSegment(audio_data, samplerate)
aug_pipeline.transform_audio(audio_seg)
self.assertTrue(audio_seg.sample_rate == 5)
def test_bayesial(self):
config_json = '[{"type":"bayesian_normal","params":{"target_db":-20,' \
'"prior_db":-4, "prior_samples": -8, "startup_delay": 0.0},"prob":1.0}]'
aug_pipeline = AugmentationPipeline(
augmentation_config=config_json, random_seed=random_seed)
audio_seg = audio.AudioSegment(audio_data, samplerate)
aug_pipeline.transform_audio(audio_seg)
orig_audio = audio.AudioSegment(audio_data, samplerate)
self.assertFalse(np.any(audio_seg.samples == orig_audio.samples))
if __name__ == '__main__':
unittest.main()
Loading…
Cancel
Save