PaddleSpeech/deepspeech/frontend/augmentor/augmentation.py

# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains the data augmentation pipeline."""

import json
import random
from deepspeech.frontend.augmentor.volume_perturb import VolumePerturbAugmentor
from deepspeech.frontend.augmentor.shift_perturb import ShiftPerturbAugmentor
from deepspeech.frontend.augmentor.speed_perturb import SpeedPerturbAugmentor
from deepspeech.frontend.augmentor.noise_perturb import NoisePerturbAugmentor
from deepspeech.frontend.augmentor.impulse_response import ImpulseResponseAugmentor
from deepspeech.frontend.augmentor.resample import ResampleAugmentor
from deepspeech.frontend.augmentor.online_bayesian_normalization import \
     OnlineBayesianNormalizationAugmentor


class AugmentationPipeline():
    """Build a pre-processing pipeline with various augmentation models.Such a
    data augmentation pipeline is oftern leveraged to augment the training
    samples to make the model invariant to certain types of perturbations in the
    real world, improving model's generalization ability.

    The pipeline is built according the the augmentation configuration in json
    string, e.g.
    
    .. code-block::

        [ {
                "type": "noise",
                "params": {"min_snr_dB": 10,
                           "max_snr_dB": 20,
                           "noise_manifest_path": "datasets/manifest.noise"},
                "prob": 0.0
            },
            {
                "type": "speed",
                "params": {"min_speed_rate": 0.9,
                           "max_speed_rate": 1.1},
                "prob": 1.0
            },
            {
                "type": "shift",
                "params": {"min_shift_ms": -5,
                           "max_shift_ms": 5},
                "prob": 1.0
            },
            {
                "type": "volume",
                "params": {"min_gain_dBFS": -10,
                           "max_gain_dBFS": 10},
                "prob": 0.0
            },
            {
                "type": "bayesian_normal",
                "params": {"target_db": -20,
                           "prior_db": -20,
                           "prior_samples": 100},
                "prob": 0.0
            }
        ]
        
    This augmentation configuration inserts two augmentation models
    into the pipeline, with one is VolumePerturbAugmentor and the other
    SpeedPerturbAugmentor. "prob" indicates the probability of the current
    augmentor to take effect. If "prob" is zero, the augmentor does not take
    effect.

    :param augmentation_config: Augmentation configuration in json string.
    :type augmentation_config: str
    :param random_seed: Random seed.
    :type random_seed: int
    :raises ValueError: If the augmentation json config is in incorrect format".
    """

    def __init__(self, augmentation_config, random_seed=0):
        self._rng = random.Random(random_seed)
        self._augmentors, self._rates = self._parse_pipeline_from(
            augmentation_config)

    def transform_audio(self, audio_segment):
        """Run the pre-processing pipeline for data augmentation.

        Note that this is an in-place transformation.
        
        :param audio_segment: Audio segment to process.
        :type audio_segment: AudioSegmenet|SpeechSegment
        """
        for augmentor, rate in zip(self._augmentors, self._rates):
            if self._rng.uniform(0., 1.) < rate:
                augmentor.transform_audio(audio_segment)

    def _parse_pipeline_from(self, config_json):
        """Parse the config json to build a augmentation pipelien."""
        try:
            configs = json.loads(config_json)
            augmentors = [
                self._get_augmentor(config["type"], config["params"])
                for config in configs
            ]
            rates = [config["prob"] for config in configs]
        except Exception as e:
            raise ValueError("Failed to parse the augmentation config json: "
                             "%s" % str(e))
        return augmentors, rates

    def _get_augmentor(self, augmentor_type, params):
        """Return an augmentation model by the type name, and pass in params."""
        if augmentor_type == "volume":
            return VolumePerturbAugmentor(self._rng, **params)
        elif augmentor_type == "shift":
            return ShiftPerturbAugmentor(self._rng, **params)
        elif augmentor_type == "speed":
            return SpeedPerturbAugmentor(self._rng, **params)
        elif augmentor_type == "resample":
            return ResampleAugmentor(self._rng, **params)
        elif augmentor_type == "bayesian_normal":
            return OnlineBayesianNormalizationAugmentor(self._rng, **params)
        elif augmentor_type == "noise":
            return NoisePerturbAugmentor(self._rng, **params)
        elif augmentor_type == "impulse":
            return ImpulseResponseAugmentor(self._rng, **params)
        else:
            raise ValueError("Unknown augmentor type [%s]." % augmentor_type)
add copyright 4 years ago			`# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.`
			`#`
			`# Licensed under the Apache License, Version 2.0 (the "License");`
			`# you may not use this file except in compliance with the License.`
			`# You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`
Add function, class and module docs for data parts in DS2. 7 years ago			`"""Contains the data augmentation pipeline."""`
Refactor whole data preprocessor for DS2 (re-design classes, re-organize dir, add augmentaion interfaces etc.). 1. Refactor data preprocessor with new added class AudioSegment, SpeechSegment, TextFeaturizer, AudioFeaturizer, SpeechFeaturizer. 2. Add data augmentation interfaces and class AugmentorBase, AugmentationPipeline, VolumnPerturbAugmentor etc.. 3. Seperate normalizer's mean and std computing from training, by adding FeatureNormalizer and a seperate tool compute_mean_std.py. 4. Re-organize directory. 7 years ago
			`import json`
			`import random`
Support paddle 2.x (#538) * 2.x model * model test pass * fix data * fix soundfile with flac support * one thread dataloader test pass * export feasture size add trainer and utils add setup model and dataloader update travis using Bionic dist * add venv; test under venv * fix unittest; train and valid * add train and config * add config and train script * fix ctc cuda memcopy error * fix imports * fix train valid log * fix dataset batch shuffle shift start from 1 fix rank_zero_only decreator error close tensorboard when train over add decoding config and code * test process can run * test with decoding * test and infer with decoding * fix infer * fix ctc loss lr schedule sortagrad logger * aishell egs * refactor train add aishell egs * fix dataset batch shuffle and add batch sampler log print model parameter * fix model and ctc * sequence_mask make all inputs zeros, which cause grad be zero, this is a bug of LessThanOp add grad clip by global norm add model train test notebook * ctc loss remove run prefix using ord value as text id * using unk when training compute_loss need text ids ord id using in test mode, which compute wer/cer * fix tester * add lr_deacy refactor code * fix tools * fix ci add tune fix gru model bugs add dataset and model test * fix decoding * refactor repo fix decoding * fix musan and rir dataset * refactor io, loss, conv, rnn, gradclip, model, utils * fix ci and import * refactor model add export jit model * add deploy bin and test it * rm uselss egs * add layer tools * refactor socket server new model from pretrain * remve useless * fix instability loss and grad nan or inf for librispeech training * fix sampler * fix libri train.sh * fix doc * add license on cpp * fix doc * fix libri script * fix install * clip 5 wer 7.39, clip 400 wer 7.54, 1.8 clip 400 baseline 7.49 4 years ago			`from deepspeech.frontend.augmentor.volume_perturb import VolumePerturbAugmentor`
			`from deepspeech.frontend.augmentor.shift_perturb import ShiftPerturbAugmentor`
			`from deepspeech.frontend.augmentor.speed_perturb import SpeedPerturbAugmentor`
			`from deepspeech.frontend.augmentor.noise_perturb import NoisePerturbAugmentor`
			`from deepspeech.frontend.augmentor.impulse_response import ImpulseResponseAugmentor`
			`from deepspeech.frontend.augmentor.resample import ResampleAugmentor`
			`from deepspeech.frontend.augmentor.online_bayesian_normalization import \`
modify some detail of augmentor 7 years ago			`OnlineBayesianNormalizationAugmentor`
Refactor whole data preprocessor for DS2 (re-design classes, re-organize dir, add augmentaion interfaces etc.). 1. Refactor data preprocessor with new added class AudioSegment, SpeechSegment, TextFeaturizer, AudioFeaturizer, SpeechFeaturizer. 2. Add data augmentation interfaces and class AugmentorBase, AugmentationPipeline, VolumnPerturbAugmentor etc.. 3. Seperate normalizer's mean and std computing from training, by adding FeatureNormalizer and a seperate tool compute_mean_std.py. 4. Re-organize directory. 7 years ago

Support paddle 2.x (#538) * 2.x model * model test pass * fix data * fix soundfile with flac support * one thread dataloader test pass * export feasture size add trainer and utils add setup model and dataloader update travis using Bionic dist * add venv; test under venv * fix unittest; train and valid * add train and config * add config and train script * fix ctc cuda memcopy error * fix imports * fix train valid log * fix dataset batch shuffle shift start from 1 fix rank_zero_only decreator error close tensorboard when train over add decoding config and code * test process can run * test with decoding * test and infer with decoding * fix infer * fix ctc loss lr schedule sortagrad logger * aishell egs * refactor train add aishell egs * fix dataset batch shuffle and add batch sampler log print model parameter * fix model and ctc * sequence_mask make all inputs zeros, which cause grad be zero, this is a bug of LessThanOp add grad clip by global norm add model train test notebook * ctc loss remove run prefix using ord value as text id * using unk when training compute_loss need text ids ord id using in test mode, which compute wer/cer * fix tester * add lr_deacy refactor code * fix tools * fix ci add tune fix gru model bugs add dataset and model test * fix decoding * refactor repo fix decoding * fix musan and rir dataset * refactor io, loss, conv, rnn, gradclip, model, utils * fix ci and import * refactor model add export jit model * add deploy bin and test it * rm uselss egs * add layer tools * refactor socket server new model from pretrain * remve useless * fix instability loss and grad nan or inf for librispeech training * fix sampler * fix libri train.sh * fix doc * add license on cpp * fix doc * fix libri script * fix install * clip 5 wer 7.39, clip 400 wer 7.54, 1.8 clip 400 baseline 7.49 4 years ago			`class AugmentationPipeline():`
Add function, class and module docs for data parts in DS2. 7 years ago			`"""Build a pre-processing pipeline with various augmentation models.Such a`
			`data augmentation pipeline is oftern leveraged to augment the training`
			`samples to make the model invariant to certain types of perturbations in the`
			`real world, improving model's generalization ability.`

			`The pipeline is built according the the augmentation configuration in json`
			`string, e.g.`

			`.. code-block::`

Add ImpulseResponseAugmentor and augmentation.config file. 7 years ago			`[ {`
			`"type": "noise",`
			`"params": {"min_snr_dB": 10,`
			`"max_snr_dB": 20,`
Update noise and impulse augmentor according to code review. 7 years ago			`"noise_manifest_path": "datasets/manifest.noise"},`
Add ImpulseResponseAugmentor and augmentation.config file. 7 years ago			`"prob": 0.0`
			`},`
			`{`
			`"type": "speed",`
			`"params": {"min_speed_rate": 0.9,`
			`"max_speed_rate": 1.1},`
			`"prob": 1.0`
			`},`
			`{`
			`"type": "shift",`
			`"params": {"min_shift_ms": -5,`
			`"max_shift_ms": 5},`
			`"prob": 1.0`
			`},`
			`{`
			`"type": "volume",`
			`"params": {"min_gain_dBFS": -10,`
			`"max_gain_dBFS": 10},`
			`"prob": 0.0`
			`},`
			`{`
			`"type": "bayesian_normal",`
			`"params": {"target_db": -20,`
			`"prior_db": -20,`
			`"prior_samples": 100},`
			`"prob": 0.0`
			`}`
			`]`

Add function, class and module docs for data parts in DS2. 7 years ago			`This augmentation configuration inserts two augmentation models`
			`into the pipeline, with one is VolumePerturbAugmentor and the other`
			`SpeedPerturbAugmentor. "prob" indicates the probability of the current`
Add ImpulseResponseAugmentor and augmentation.config file. 7 years ago			`augmentor to take effect. If "prob" is zero, the augmentor does not take`
			`effect.`
Add function, class and module docs for data parts in DS2. 7 years ago
			`:param augmentation_config: Augmentation configuration in json string.`
			`:type augmentation_config: str`
			`:param random_seed: Random seed.`
			`:type random_seed: int`
			`:raises ValueError: If the augmentation json config is in incorrect format".`
			`"""`

Refactor whole data preprocessor for DS2 (re-design classes, re-organize dir, add augmentaion interfaces etc.). 1. Refactor data preprocessor with new added class AudioSegment, SpeechSegment, TextFeaturizer, AudioFeaturizer, SpeechFeaturizer. 2. Add data augmentation interfaces and class AugmentorBase, AugmentationPipeline, VolumnPerturbAugmentor etc.. 3. Seperate normalizer's mean and std computing from training, by adding FeatureNormalizer and a seperate tool compute_mean_std.py. 4. Re-organize directory. 7 years ago			`def __init__(self, augmentation_config, random_seed=0):`
			`self._rng = random.Random(random_seed)`
			`self._augmentors, self._rates = self._parse_pipeline_from(`
			`augmentation_config)`

			`def transform_audio(self, audio_segment):`
Add function, class and module docs for data parts in DS2. 7 years ago			`"""Run the pre-processing pipeline for data augmentation.`

			`Note that this is an in-place transformation.`

			`:param audio_segment: Audio segment to process.`
			`:type audio_segment: AudioSegmenet\|SpeechSegment`
			`"""`
Refactor whole data preprocessor for DS2 (re-design classes, re-organize dir, add augmentaion interfaces etc.). 1. Refactor data preprocessor with new added class AudioSegment, SpeechSegment, TextFeaturizer, AudioFeaturizer, SpeechFeaturizer. 2. Add data augmentation interfaces and class AugmentorBase, AugmentationPipeline, VolumnPerturbAugmentor etc.. 3. Seperate normalizer's mean and std computing from training, by adding FeatureNormalizer and a seperate tool compute_mean_std.py. 4. Re-organize directory. 7 years ago			`for augmentor, rate in zip(self._augmentors, self._rates):`
Add ImpulseResponseAugmentor and augmentation.config file. 7 years ago			`if self._rng.uniform(0., 1.) < rate:`
Refactor whole data preprocessor for DS2 (re-design classes, re-organize dir, add augmentaion interfaces etc.). 1. Refactor data preprocessor with new added class AudioSegment, SpeechSegment, TextFeaturizer, AudioFeaturizer, SpeechFeaturizer. 2. Add data augmentation interfaces and class AugmentorBase, AugmentationPipeline, VolumnPerturbAugmentor etc.. 3. Seperate normalizer's mean and std computing from training, by adding FeatureNormalizer and a seperate tool compute_mean_std.py. 4. Re-organize directory. 7 years ago			`augmentor.transform_audio(audio_segment)`

			`def _parse_pipeline_from(self, config_json):`
Add function, class and module docs for data parts in DS2. 7 years ago			`"""Parse the config json to build a augmentation pipelien."""`
Refactor whole data preprocessor for DS2 (re-design classes, re-organize dir, add augmentaion interfaces etc.). 1. Refactor data preprocessor with new added class AudioSegment, SpeechSegment, TextFeaturizer, AudioFeaturizer, SpeechFeaturizer. 2. Add data augmentation interfaces and class AugmentorBase, AugmentationPipeline, VolumnPerturbAugmentor etc.. 3. Seperate normalizer's mean and std computing from training, by adding FeatureNormalizer and a seperate tool compute_mean_std.py. 4. Re-organize directory. 7 years ago			`try:`
			`configs = json.loads(config_json)`
Add function, class and module docs for data parts in DS2. 7 years ago			`augmentors = [`
			`self._get_augmentor(config["type"], config["params"])`
			`for config in configs`
			`]`
			`rates = [config["prob"] for config in configs]`
Refactor whole data preprocessor for DS2 (re-design classes, re-organize dir, add augmentaion interfaces etc.). 1. Refactor data preprocessor with new added class AudioSegment, SpeechSegment, TextFeaturizer, AudioFeaturizer, SpeechFeaturizer. 2. Add data augmentation interfaces and class AugmentorBase, AugmentationPipeline, VolumnPerturbAugmentor etc.. 3. Seperate normalizer's mean and std computing from training, by adding FeatureNormalizer and a seperate tool compute_mean_std.py. 4. Re-organize directory. 7 years ago			`except Exception as e:`
Add function, class and module docs for data parts in DS2. 7 years ago			`raise ValueError("Failed to parse the augmentation config json: "`
Refactor whole data preprocessor for DS2 (re-design classes, re-organize dir, add augmentaion interfaces etc.). 1. Refactor data preprocessor with new added class AudioSegment, SpeechSegment, TextFeaturizer, AudioFeaturizer, SpeechFeaturizer. 2. Add data augmentation interfaces and class AugmentorBase, AugmentationPipeline, VolumnPerturbAugmentor etc.. 3. Seperate normalizer's mean and std computing from training, by adding FeatureNormalizer and a seperate tool compute_mean_std.py. 4. Re-organize directory. 7 years ago			`"%s" % str(e))`
			`return augmentors, rates`

			`def _get_augmentor(self, augmentor_type, params):`
Add function, class and module docs for data parts in DS2. 7 years ago			`"""Return an augmentation model by the type name, and pass in params."""`
			`if augmentor_type == "volume":`
			`return VolumePerturbAugmentor(self._rng, **params)`
Improve audio featurizer and add shift augmentor. 1. Improve audio featurizer. 2. Add shift augmentor. 3. Update default argument to be the current best seggestion. 4. Add checkpoints with pass id. 7 years ago			`elif augmentor_type == "shift":`
			`return ShiftPerturbAugmentor(self._rng, **params)`
add 3 augmentor and unittest 7 years ago			`elif augmentor_type == "speed":`
add augmentor class 7 years ago			`return SpeedPerturbAugmentor(self._rng, **params)`
add 3 augmentor and unittest 7 years ago			`elif augmentor_type == "resample":`
add augmentor class 7 years ago			`return ResampleAugmentor(self._rng, **params)`
add 3 augmentor and unittest 7 years ago			`elif augmentor_type == "bayesian_normal":`
add augmentor class 7 years ago			`return OnlineBayesianNormalizationAugmentor(self._rng, **params)`
Add NoisePerturbAugmentor and CHiME3 data preparation. 7 years ago			`elif augmentor_type == "noise":`
			`return NoisePerturbAugmentor(self._rng, **params)`
Add ImpulseResponseAugmentor and augmentation.config file. 7 years ago			`elif augmentor_type == "impulse":`
			`return ImpulseResponseAugmentor(self._rng, **params)`
Refactor whole data preprocessor for DS2 (re-design classes, re-organize dir, add augmentaion interfaces etc.). 1. Refactor data preprocessor with new added class AudioSegment, SpeechSegment, TextFeaturizer, AudioFeaturizer, SpeechFeaturizer. 2. Add data augmentation interfaces and class AugmentorBase, AugmentationPipeline, VolumnPerturbAugmentor etc.. 3. Seperate normalizer's mean and std computing from training, by adding FeatureNormalizer and a seperate tool compute_mean_std.py. 4. Re-organize directory. 7 years ago			`else:`
			`raise ValueError("Unknown augmentor type [%s]." % augmentor_type)`