resolve conflicts in requirements.txt

pull/2/head
Yibing Liu 7 years ago
commit 90d83bf739

@ -6,7 +6,7 @@ from __future__ import print_function
import numpy as np import numpy as np
import io import io
import soundfile import soundfile
import scikits.samplerate import resampy
from scipy import signal from scipy import signal
import random import random
import copy import copy
@ -308,7 +308,7 @@ class AudioSegment(object):
prior_mean_squared = 10.**(prior_db / 10.) prior_mean_squared = 10.**(prior_db / 10.)
prior_sum_of_squares = prior_mean_squared * prior_samples prior_sum_of_squares = prior_mean_squared * prior_samples
cumsum_of_squares = np.cumsum(self.samples**2) cumsum_of_squares = np.cumsum(self.samples**2)
sample_count = np.arange(len(self.num_samples)) + 1 sample_count = np.arange(self.num_samples) + 1
if startup_sample_idx > 0: if startup_sample_idx > 0:
cumsum_of_squares[:startup_sample_idx] = \ cumsum_of_squares[:startup_sample_idx] = \
cumsum_of_squares[startup_sample_idx] cumsum_of_squares[startup_sample_idx]
@ -321,21 +321,19 @@ class AudioSegment(object):
gain_db = target_db - rms_estimate_db gain_db = target_db - rms_estimate_db
self.gain_db(gain_db) self.gain_db(gain_db)
def resample(self, target_sample_rate, quality='sinc_medium'): def resample(self, target_sample_rate, filter='kaiser_best'):
"""Resample the audio to a target sample rate. """Resample the audio to a target sample rate.
Note that this is an in-place transformation. Note that this is an in-place transformation.
:param target_sample_rate: Target sample rate. :param target_sample_rate: Target sample rate.
:type target_sample_rate: int :type target_sample_rate: int
:param quality: One of {'sinc_fastest', 'sinc_medium', 'sinc_best'}. :param filter: The resampling filter to use one of {'kaiser_best',
Sets resampling speed/quality tradeoff. 'kaiser_fast'}.
See http://www.mega-nerd.com/SRC/api_misc.html#Converters :type filter: str
:type quality: str
""" """
resample_ratio = target_sample_rate / self._sample_rate self._samples = resampy.resample(
self._samples = scikits.samplerate.resample( self.samples, self.sample_rate, target_sample_rate, filter=filter)
self._samples, r=resample_ratio, type=quality)
self._sample_rate = target_sample_rate self._sample_rate = target_sample_rate
def pad_silence(self, duration, sides='both'): def pad_silence(self, duration, sides='both'):

@ -7,6 +7,10 @@ import json
import random import random
from data_utils.augmentor.volume_perturb import VolumePerturbAugmentor from data_utils.augmentor.volume_perturb import VolumePerturbAugmentor
from data_utils.augmentor.shift_perturb import ShiftPerturbAugmentor from data_utils.augmentor.shift_perturb import ShiftPerturbAugmentor
from data_utils.augmentor.speed_perturb import SpeedPerturbAugmentor
from data_utils.augmentor.resample import ResampleAugmentor
from data_utils.augmentor.online_bayesian_normalization import \
OnlineBayesianNormalizationAugmentor
class AugmentationPipeline(object): class AugmentationPipeline(object):
@ -79,5 +83,11 @@ class AugmentationPipeline(object):
return VolumePerturbAugmentor(self._rng, **params) return VolumePerturbAugmentor(self._rng, **params)
elif augmentor_type == "shift": elif augmentor_type == "shift":
return ShiftPerturbAugmentor(self._rng, **params) return ShiftPerturbAugmentor(self._rng, **params)
elif augmentor_type == "speed":
return SpeedPerturbAugmentor(self._rng, **params)
elif augmentor_type == "resample":
return ResampleAugmentor(self._rng, **params)
elif augmentor_type == "bayesian_normal":
return OnlineBayesianNormalizationAugmentor(self._rng, **params)
else: else:
raise ValueError("Unknown augmentor type [%s]." % augmentor_type) raise ValueError("Unknown augmentor type [%s]." % augmentor_type)

@ -0,0 +1,48 @@
"""Contain the online bayesian normalization augmentation model."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from data_utils.augmentor.base import AugmentorBase
class OnlineBayesianNormalizationAugmentor(AugmentorBase):
"""Augmentation model for adding online bayesian normalization.
:param rng: Random generator object.
:type rng: random.Random
:param target_db: Target RMS value in decibels.
:type target_db: float
:param prior_db: Prior RMS estimate in decibels.
:type prior_db: float
:param prior_samples: Prior strength in number of samples.
:type prior_samples: int
:param startup_delay: Default 0.0s. If provided, this function will
accrue statistics for the first startup_delay
seconds before applying online normalization.
:type starup_delay: float.
"""
def __init__(self,
rng,
target_db,
prior_db,
prior_samples,
startup_delay=0.0):
self._target_db = target_db
self._prior_db = prior_db
self._prior_samples = prior_samples
self._rng = rng
self._startup_delay = startup_delay
def transform_audio(self, audio_segment):
"""Normalizes the input audio using the online Bayesian approach.
Note that this is an in-place transformation.
:param audio_segment: Audio segment to add effects to.
:type audio_segment: AudioSegment|SpeechSegment
"""
audio_segment.normalize_online_bayesian(self._target_db, self._prior_db,
self._prior_samples,
self._startup_delay)

@ -0,0 +1,33 @@
"""Contain the resample augmentation model."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from data_utils.augmentor.base import AugmentorBase
class ResampleAugmentor(AugmentorBase):
"""Augmentation model for resampling.
See more info here:
https://ccrma.stanford.edu/~jos/resample/index.html
:param rng: Random generator object.
:type rng: random.Random
:param new_sample_rate: New sample rate in Hz.
:type new_sample_rate: int
"""
def __init__(self, rng, new_sample_rate):
self._new_sample_rate = new_sample_rate
self._rng = rng
def transform_audio(self, audio_segment):
"""Resamples the input audio to a target sample rate.
Note that this is an in-place transformation.
:param audio: Audio segment to add effects to.
:type audio: AudioSegment|SpeechSegment
"""
audio_segment.resample(self._new_sample_rate)

@ -0,0 +1,47 @@
"""Contain the speech perturbation augmentation model."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from data_utils.augmentor.base import AugmentorBase
class SpeedPerturbAugmentor(AugmentorBase):
"""Augmentation model for adding speed perturbation.
See reference paper here:
http://www.danielpovey.com/files/2015_interspeech_augmentation.pdf
:param rng: Random generator object.
:type rng: random.Random
:param min_speed_rate: Lower bound of new speed rate to sample and should
not be smaller than 0.9.
:type min_speed_rate: float
:param max_speed_rate: Upper bound of new speed rate to sample and should
not be larger than 1.1.
:type max_speed_rate: float
"""
def __init__(self, rng, min_speed_rate, max_speed_rate):
if min_speed_rate < 0.9:
raise ValueError(
"Sampling speed below 0.9 can cause unnatural effects")
if max_speed_rate > 1.1:
raise ValueError(
"Sampling speed above 1.1 can cause unnatural effects")
self._min_speed_rate = min_speed_rate
self._max_speed_rate = max_speed_rate
self._rng = rng
def transform_audio(self, audio_segment):
"""Sample a new speed rate from the given range and
changes the speed of the given audio clip.
Note that this is an in-place transformation.
:param audio_segment: Audio segment to add effects to.
:type audio_segment: AudioSegment|SpeechSegment
"""
sampled_speed = self._rng.uniform(self._min_speed_rate,
self._max_speed_rate)
audio_segment.change_speed(sampled_speed)

@ -37,4 +37,4 @@ class VolumePerturbAugmentor(AugmentorBase):
:type audio_segment: AudioSegmenet|SpeechSegment :type audio_segment: AudioSegmenet|SpeechSegment
""" """
gain = self._rng.uniform(self._min_gain_dBFS, self._max_gain_dBFS) gain = self._rng.uniform(self._min_gain_dBFS, self._max_gain_dBFS)
audio_segment.apply_gain(gain) audio_segment.gain_db(gain)

@ -1,4 +1,4 @@
SoundFile==0.9.0.post1
wget==3.2 wget==3.2
scipy==0.13.1 scipy==0.13.1
resampy==0.1.5
https://github.com/kpu/kenlm/archive/master.zip https://github.com/kpu/kenlm/archive/master.zip

@ -1,7 +1,7 @@
#!/bin/bash #!/bin/bash
# install python dependencies # install python dependencies
if [ -f 'requirements.txt' ]; then if [ -f "requirements.txt" ]; then
pip install -r requirements.txt pip install -r requirements.txt
fi fi
if [ $? != 0 ]; then if [ $? != 0 ]; then
@ -9,21 +9,21 @@ if [ $? != 0 ]; then
exit 1 exit 1
fi fi
# install scikits.samplerate # install package Soundfile
curl -O "http://www.mega-nerd.com/SRC/libsamplerate-0.1.9.tar.gz" curl -O "http://www.mega-nerd.com/libsndfile/files/libsndfile-1.0.28.tar.gz"
if [ $? != 0 ]; then if [ $? != 0 ]; then
echo "Download libsamplerate-0.1.9.tar.gz failed !!!" echo "Download libsndfile-1.0.28.tar.gz failed !!!"
exit 1 exit 1
fi fi
tar -xvf libsamplerate-0.1.9.tar.gz tar -zxvf libsndfile-1.0.28.tar.gz
cd libsamplerate-0.1.9 cd libsndfile-1.0.28
./configure && make && make install ./configure && make && make install
cd - cd -
rm -rf libsamplerate-0.1.9 rm -rf libsndfile-1.0.28
rm libsamplerate-0.1.9.tar.gz rm libsndfile-1.0.28.tar.gz
pip install scikits.samplerate==0.3.3 pip install SoundFile==0.9.0.post1
if [ $? != 0 ]; then if [ $? != 0 ]; then
echo "Install scikits.samplerate failed !!!" echo "Install SoundFile failed !!!"
exit 1 exit 1
fi fi

Loading…
Cancel
Save