You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
PaddleSpeech/data_utils/augmentor/speed_perturb.py

54 lines
1.6 KiB

"""Speed perturbation module for making ASR robust to different voice
types (high pitched, low pitched, etc)
Samples uniformly between speed_min and speed_max
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from . import base
class SpeedPerturbatioAugmentor(base.AugmentorBase):
"""
Instantiates a speed perturbation module.
See reference paper here:
http://www.danielpovey.com/files/2015_interspeech_augmentation.pdf
:param speed_min: Lower bound on new rate to sample
:type speed_min: func[int->scalar]
:param speed_max: Upper bound on new rate to sample
:type speed_max: func[int->scalar]
"""
def __init__(self, rng, speed_min, speed_max):
if (speed_min < 0.9):
raise ValueError(
"Sampling speed below 0.9 can cause unnatural effects")
if (speed_min > 1.1):
raise ValueError(
"Sampling speed above 1.1 can cause unnatural effects")
self.speed_min = speed_min
self.speed_max = speed_max
self.rng = rng
def transform_audio(self, audio_segment):
"""
Samples a new speed rate from the given range and
changes the speed of the given audio clip.
Note that this is an in-place transformation.
:param audio_segment: input audio
:type audio_segment: SpeechDLSegment
"""
read_size = 0
speed_min = self.speed_min(iteration)
speed_max = self.speed_max(iteration)
sampled_speed = rng.uniform(speed_min, speed_max)
audio = audio.change_speed(sampled_speed)