fix

4 years ago · aaeef54f46
parent 82ca0f6549
commit aaeef54f46
6 changed files with 28 additions and 33 deletions
--- a/deepspeech/frontend/augmentor/shift_perturb.py
+++ b/deepspeech/frontend/augmentor/shift_perturb.py
@ -32,7 +32,8 @@ class ShiftPerturbAugmentor(AugmentorBase):
        self._rng = rng

    def randomize_parameters(self):
-        self.shift_ms = self._rng.uniform(self._min_shift_ms, self._max_shift_ms)
+        self.shift_ms = self._rng.uniform(self._min_shift_ms,
+                                          self._max_shift_ms)

    def apply(self, audio_segment):
        audio_segment.shift(self.shift_ms)
@ -49,7 +50,6 @@ class ShiftPerturbAugmentor(AugmentorBase):
    #         self.randomize_parameters()
    #     self.apply(audio_segment)

-
    # def transform_audio(self, audio_segment):
    #     """Shift audio.

@ -60,5 +60,3 @@ class ShiftPerturbAugmentor(AugmentorBase):
    #     """
    #     shift_ms = self._rng.uniform(self._min_shift_ms, self._max_shift_ms)
    #     audio_segment.shift(shift_ms)
-
-
--- a/deepspeech/frontend/augmentor/spec_augment.py
+++ b/deepspeech/frontend/augmentor/spec_augment.py
@ -174,7 +174,6 @@ class SpecAugmentor(AugmentorBase):
            assert t_0 <= t_0 + t
        return xs

-
    # def mask_freq(self, xs, replace_with_zero=False):
    #     n_bins = xs.shape[0]
    #     for i in range(0, self.n_freq_masks):
@ -208,7 +207,6 @@ class SpecAugmentor(AugmentorBase):
    #         self._time_mask = (t_0, t_0 + t)
    #     return xs

-
    # def transform_feature(self, xs: np.ndarray, single=True):
    #     """
    #     Args:
--- a/deepspeech/frontend/augmentor/speed_perturb.py
+++ b/deepspeech/frontend/augmentor/speed_perturb.py
@ -79,7 +79,6 @@ class SpeedPerturbAugmentor(AugmentorBase):
            self._rates = np.linspace(
                self._min_rate, self._max_rate, self._num_rates, endpoint=True)

-
    def randomize_parameters(self):
        if self._num_rates < 0:
            self.speed_rate = self._rng.uniform(self._min_rate, self._max_rate)
--- a/deepspeech/io/collator.py
+++ b/deepspeech/io/collator.py
@ -174,7 +174,6 @@ class SpeechCollator():
        self._stride_ms = stride_ms
        self._target_sample_rate = target_sample_rate

-
        self._speech_featurizer = SpeechFeaturizer(
            unit_type=unit_type,
            vocab_filepath=vocab_filepath,
@ -231,7 +230,8 @@ class SpeechCollator():
        self._augmentation_pipeline.randomize_parameters_audio_transform()

    def randomize_feature_parameters(self, n_frames, n_bins):
-        self._augmentation_pipeline.randomize_parameters_feature_transform(n_frames, n_bins)
+        self._augmentation_pipeline.randomize_parameters_feature_transform(
+            n_frames, n_bins)

    def process_feature_and_transform(self, audio_file, transcript):
        """Load, augment, featurize and normalize for speech data.
@ -261,7 +261,6 @@ class SpeechCollator():

        return specgram, transcript_part

-
    # def process_utterance(self, audio_file, transcript, single=True):
    #     """Load, augment, featurize and normalize for speech data.

@ -282,7 +281,6 @@ class SpeechCollator():
    #     # audio augment
    #     self._augmentation_pipeline.transform_audio(speech_segment)

-
    #     # Spectrum transform
    #     specgram, transcript_part = self._speech_featurizer.featurize(
    #         speech_segment, self._keep_transcription_text)
@ -357,7 +355,9 @@ class SpeechCollator():
        for i in range(len(padded_audios)):
            if not self._randomize_each_batch:
                self.randomize_feature_parameters(audio_lens[i], n_bins)
-            padded_audios[i] = self._augmentation_pipeline.apply_feature_transform(padded_audios[i])
+            padded_audios[
+                i] = self._augmentation_pipeline.apply_feature_transform(
+                    padded_audios[i])

        return utts, padded_audios, audio_lens, padded_texts, text_lens