pull/673/head
Haoxin Ma 4 years ago
parent 82ca0f6549
commit aaeef54f46

@ -32,7 +32,8 @@ class ShiftPerturbAugmentor(AugmentorBase):
self._rng = rng
def randomize_parameters(self):
self.shift_ms = self._rng.uniform(self._min_shift_ms, self._max_shift_ms)
self.shift_ms = self._rng.uniform(self._min_shift_ms,
self._max_shift_ms)
def apply(self, audio_segment):
audio_segment.shift(self.shift_ms)
@ -49,7 +50,6 @@ class ShiftPerturbAugmentor(AugmentorBase):
# self.randomize_parameters()
# self.apply(audio_segment)
# def transform_audio(self, audio_segment):
# """Shift audio.
@ -60,5 +60,3 @@ class ShiftPerturbAugmentor(AugmentorBase):
# """
# shift_ms = self._rng.uniform(self._min_shift_ms, self._max_shift_ms)
# audio_segment.shift(shift_ms)

@ -174,7 +174,6 @@ class SpecAugmentor(AugmentorBase):
assert t_0 <= t_0 + t
return xs
# def mask_freq(self, xs, replace_with_zero=False):
# n_bins = xs.shape[0]
# for i in range(0, self.n_freq_masks):
@ -208,7 +207,6 @@ class SpecAugmentor(AugmentorBase):
# self._time_mask = (t_0, t_0 + t)
# return xs
# def transform_feature(self, xs: np.ndarray, single=True):
# """
# Args:

@ -79,7 +79,6 @@ class SpeedPerturbAugmentor(AugmentorBase):
self._rates = np.linspace(
self._min_rate, self._max_rate, self._num_rates, endpoint=True)
def randomize_parameters(self):
if self._num_rates < 0:
self.speed_rate = self._rng.uniform(self._min_rate, self._max_rate)

@ -174,7 +174,6 @@ class SpeechCollator():
self._stride_ms = stride_ms
self._target_sample_rate = target_sample_rate
self._speech_featurizer = SpeechFeaturizer(
unit_type=unit_type,
vocab_filepath=vocab_filepath,
@ -231,7 +230,8 @@ class SpeechCollator():
self._augmentation_pipeline.randomize_parameters_audio_transform()
def randomize_feature_parameters(self, n_frames, n_bins):
self._augmentation_pipeline.randomize_parameters_feature_transform(n_frames, n_bins)
self._augmentation_pipeline.randomize_parameters_feature_transform(
n_frames, n_bins)
def process_feature_and_transform(self, audio_file, transcript):
"""Load, augment, featurize and normalize for speech data.
@ -261,7 +261,6 @@ class SpeechCollator():
return specgram, transcript_part
# def process_utterance(self, audio_file, transcript, single=True):
# """Load, augment, featurize and normalize for speech data.
@ -282,7 +281,6 @@ class SpeechCollator():
# # audio augment
# self._augmentation_pipeline.transform_audio(speech_segment)
# # Spectrum transform
# specgram, transcript_part = self._speech_featurizer.featurize(
# speech_segment, self._keep_transcription_text)
@ -357,7 +355,9 @@ class SpeechCollator():
for i in range(len(padded_audios)):
if not self._randomize_each_batch:
self.randomize_feature_parameters(audio_lens[i], n_bins)
padded_audios[i] = self._augmentation_pipeline.apply_feature_transform(padded_audios[i])
padded_audios[
i] = self._augmentation_pipeline.apply_feature_transform(
padded_audios[i])
return utts, padded_audios, audio_lens, padded_texts, text_lens

Loading…
Cancel
Save