You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
54 lines
1.6 KiB
54 lines
1.6 KiB
"""Speed perturbation module for making ASR robust to different voice
|
|
types (high pitched, low pitched, etc)
|
|
Samples uniformly between speed_min and speed_max
|
|
"""
|
|
|
|
from __future__ import absolute_import
|
|
from __future__ import division
|
|
from __future__ import print_function
|
|
|
|
from . import base
|
|
|
|
|
|
class SpeedPerturbatioAugmentor(base.AugmentorBase):
|
|
"""
|
|
Instantiates a speed perturbation module.
|
|
|
|
See reference paper here:
|
|
|
|
http://www.danielpovey.com/files/2015_interspeech_augmentation.pdf
|
|
|
|
:param speed_min: Lower bound on new rate to sample
|
|
:type speed_min: func[int->scalar]
|
|
:param speed_max: Upper bound on new rate to sample
|
|
:type speed_max: func[int->scalar]
|
|
"""
|
|
|
|
def __init__(self, rng, speed_min, speed_max):
|
|
|
|
if (speed_min < 0.9):
|
|
raise ValueError(
|
|
"Sampling speed below 0.9 can cause unnatural effects")
|
|
if (speed_min > 1.1):
|
|
raise ValueError(
|
|
"Sampling speed above 1.1 can cause unnatural effects")
|
|
self.speed_min = speed_min
|
|
self.speed_max = speed_max
|
|
self.rng = rng
|
|
|
|
def transform_audio(self, audio_segment):
|
|
"""
|
|
Samples a new speed rate from the given range and
|
|
changes the speed of the given audio clip.
|
|
|
|
Note that this is an in-place transformation.
|
|
|
|
:param audio_segment: input audio
|
|
:type audio_segment: SpeechDLSegment
|
|
"""
|
|
read_size = 0
|
|
speed_min = self.speed_min(iteration)
|
|
speed_max = self.speed_max(iteration)
|
|
sampled_speed = rng.uniform(speed_min, speed_max)
|
|
audio = audio.change_speed(sampled_speed)
|