diff --git a/deepspeech/frontend/audio.py b/deepspeech/frontend/audio.py index 4488f5f2e..ffdcd4b3a 100644 --- a/deepspeech/frontend/audio.py +++ b/deepspeech/frontend/audio.py @@ -351,7 +351,9 @@ class AudioSegment(object): tfm.set_globals(multithread=False) tfm.speed(speed_rate) self._samples = tfm.build_array( - input_array=self._samples, sample_rate_in=self._sample_rate).copy() + input_array=self._samples, + sample_rate_in=self._sample_rate).squeeze(-1).astype( + np.float32).copy() def normalize(self, target_db=-20, max_gain_db=300.0): """Normalize audio to be of the desired RMS value in decibels. diff --git a/examples/librispeech/s0/conf/augmentation.json b/examples/librispeech/s0/conf/augmentation.json index a1a759e67..5635d9c84 100644 --- a/examples/librispeech/s0/conf/augmentation.json +++ b/examples/librispeech/s0/conf/augmentation.json @@ -1,4 +1,13 @@ [ + { + "type": "speed", + "params": { + "min_speed_rate": 0.9, + "max_speed_rate": 1.1, + "num_rates": 3 + }, + "prob": 0.0 + }, { "type": "shift", "params": { diff --git a/examples/tiny/s1/conf/augmentation.json b/examples/tiny/s1/conf/augmentation.json index 1987ad424..f26c282e7 100644 --- a/examples/tiny/s1/conf/augmentation.json +++ b/examples/tiny/s1/conf/augmentation.json @@ -6,7 +6,7 @@ "max_speed_rate": 1.1, "num_rates": 3 }, - "prob": 0.0 + "prob": 1.0 }, { "type": "shift",