Add NoisePerturbAugmentor and CHiME3 data preparation.

9 years ago · ad82c87712
parent 961f6a2963
commit ad82c87712
6 changed files with 187 additions and 0 deletions
--- a/data_utils/augmentor/augmentation.py
+++ b/data_utils/augmentor/augmentation.py
@ -8,6 +8,7 @@ import random
 from data_utils.augmentor.volume_perturb import VolumePerturbAugmentor
 from data_utils.augmentor.shift_perturb import ShiftPerturbAugmentor
 from data_utils.augmentor.speed_perturb import SpeedPerturbAugmentor
 from data_utils.augmentor.noise_perturb import NoisePerturbAugmentor
 from data_utils.augmentor.resample import ResampleAugmentor
 from data_utils.augmentor.online_bayesian_normalization import \
     OnlineBayesianNormalizationAugmentor
@ -89,5 +90,7 @@ class AugmentationPipeline(object):
            return ResampleAugmentor(self._rng, **params)
        elif augmentor_type == "bayesian_normal":
            return OnlineBayesianNormalizationAugmentor(self._rng, **params)
        elif augmentor_type == "noise":
            return NoisePerturbAugmentor(self._rng, **params)
        else:
            raise ValueError("Unknown augmentor type [%s]." % augmentor_type)
--- a/data_utils/augmentor/noise_perturb.py
+++ b/data_utils/augmentor/noise_perturb.py
@ -0,0 +1,47 @@
 """Contains the noise perturb augmentation model."""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 from data_utils.augmentor.base import AugmentorBase
 from data_utils import utils
 from data_utils.speech import SpeechSegment
 class NoisePerturbAugmentor(AugmentorBase):
    """Augmentation model for adding background noise.
    :param rng: Random generator object.
    :type rng: random.Random
    :param min_snr_dB: Minimal signal noise ratio, in decibels.
    :type min_snr_dB: float
    :param max_snr_dB: Maximal signal noise ratio, in decibels.
    :type max_snr_dB: float
    """
    def __init__(self, rng, min_snr_dB, max_snr_dB, noise_manifest):
        self._min_snr_dB = min_snr_dB
        self._max_snr_dB = max_snr_dB
        self._rng = rng
        self._manifest = utils.read_manifest(manifest_path=noise_manifest)
    def transform_audio(self, audio_segment):
        """Add background noise audio.
        Note that this is an in-place transformation.
        :param audio_segment: Audio segment to add effects to.
        :type audio_segment: AudioSegmenet|SpeechSegment
        """
        noise_json = self._rng.sample(self._manifest, 1)[0]
        if noise_json['duration'] < audio_segment.duration:
            raise RuntimeError("The duration of sampled noise audio is smaller "
                               "than the audio segment to add effects to.")
        diff_duration = noise_json['duration'] - audio_segment.duration
        start = self._rng.uniform(0, diff_duration)
        end = start + audio_segment.duration
        noise_segment = SpeechSegment.slice_from_file(
            noise_json['audio_filepath'], transcript="", start=start, end=end)
        snr_dB = self._rng.uniform(self._min_snr_dB, self._max_snr_dB)
        audio_segment.add_noise(
            noise_segment, snr_dB, allow_downsampling=True, rng=self._rng)
--- a/data_utils/augmentor/online_bayesian_normalization.py
+++ b/data_utils/augmentor/online_bayesian_normalization.py
--- a/data_utils/augmentor/resample.py
+++ b/data_utils/augmentor/resample.py
--- a/datasets/noise/chime3_background.py
+++ b/datasets/noise/chime3_background.py
@ -0,0 +1,128 @@
 """Prepare CHiME3 background data.
 Download, unpack and create manifest files.
 Manifest file is a json-format file with each line containing the
 meta data (i.e. audio filepath, transcript and audio duration)
 of each audio file in the data set.
 """
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import distutils.util
 import os
 import wget
 import zipfile
 import argparse
 import soundfile
 import json
 from paddle.v2.dataset.common import md5file
 DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech')
 URL = "https://d4s.myairbridge.com/packagev2/AG0Y3DNBE5IWRRTV/?dlid=W19XG7T0NNHB027139H0EQ"
 MD5 = "c3ff512618d7a67d4f85566ea1bc39ec"
 parser = argparse.ArgumentParser(description=__doc__)
 parser.add_argument(
    "--target_dir",
    default=DATA_HOME + "/chime3_background",
    type=str,
    help="Directory to save the dataset. (default: %(default)s)")
 parser.add_argument(
    "--manifest_filepath",
    default="manifest.chime3.background",
    type=str,
    help="Filepath for output manifests. (default: %(default)s)")
 args = parser.parse_args()
 def download(url, md5sum, target_dir, filename=None):
    """Download file from url to target_dir, and check md5sum."""
    if filename == None:
        filename = url.split("/")[-1]
    if not os.path.exists(target_dir): os.makedirs(target_dir)
    filepath = os.path.join(target_dir, filename)
    if not (os.path.exists(filepath) and md5file(filepath) == md5sum):
        print("Downloading %s ..." % url)
        wget.download(url, target_dir)
        print("\nMD5 Chesksum %s ..." % filepath)
        if not md5file(filepath) == md5sum:
            raise RuntimeError("MD5 checksum failed.")
    else:
        print("File exists, skip downloading. (%s)" % filepath)
    return filepath
 def unpack(filepath, target_dir):
    """Unpack the file to the target_dir."""
    print("Unpacking %s ..." % filepath)
    if filepath.endswith('.zip'):
        zip = zipfile.ZipFile(filepath, 'r')
        zip.extractall(target_dir)
        zip.close()
    elif filepath.endswith('.tar') or filepath.endswith('.tar.gz'):
        tar = zipfile.open(filepath)
        tar.extractall(target_dir)
        tar.close()
    else:
        raise ValueError("File format is not supported for unpacking.")
 def create_manifest(data_dir, manifest_path):
    """Create a manifest json file summarizing the data set, with each line
    containing the meta data (i.e. audio filepath, transcription text, audio
    duration) of each audio file within the data set.
    """
    print("Creating manifest %s ..." % manifest_path)
    json_lines = []
    for subfolder, _, filelist in sorted(os.walk(data_dir)):
        for filename in filelist:
            if filename.endswith('.wav'):
                filepath = os.path.join(data_dir, subfolder, filename)
                audio_data, samplerate = soundfile.read(filepath)
                duration = float(len(audio_data)) / samplerate
                json_lines.append(
                    json.dumps({
                        'audio_filepath': filepath,
                        'duration': duration,
                        'text': ''
                    }))
    with open(manifest_path, 'w') as out_file:
        for line in json_lines:
            out_file.write(line + '\n')
 def prepare_chime3(url, md5sum, target_dir, manifest_path):
    """Download, unpack and create summmary manifest file."""
    if not os.path.exists(os.path.join(target_dir, "CHiME3")):
        # download
        filepath = download(url, md5sum, target_dir,
                            "myairbridge-AG0Y3DNBE5IWRRTV.zip")
        # unpack
        unpack(filepath, target_dir)
        unpack(
            os.path.join(target_dir, 'CHiME3_background_bus.zip'), target_dir)
        unpack(
            os.path.join(target_dir, 'CHiME3_background_caf.zip'), target_dir)
        unpack(
            os.path.join(target_dir, 'CHiME3_background_ped.zip'), target_dir)
        unpack(
            os.path.join(target_dir, 'CHiME3_background_str.zip'), target_dir)
    else:
        print("Skip downloading and unpacking. Data already exists in %s." %
              target_dir)
    # create manifest json file
    create_manifest(target_dir, manifest_path)
 def main():
    prepare_chime3(
        url=URL,
        md5sum=MD5,
        target_dir=args.target_dir,
        manifest_path=args.manifest_filepath)
 if __name__ == '__main__':
    main()
--- a/datasets/run_all.sh
+++ b/datasets/run_all.sh
@ -6,8 +6,17 @@ if [ $? -ne 0 ]; then
 fi
 cd -
 cd noise 
 python chime3_background.py
 if [ $? -ne 0 ]; then
    echo "Prepare CHiME3 background noise failed. Terminated."
    exit 1
 fi
 cd -
 cat librispeech/manifest.train* | shuf > manifest.train
 cat librispeech/manifest.dev-clean > manifest.dev
 cat librispeech/manifest.test-clean > manifest.test
 cat noise/manifest.* > manifest.noise
 echo "All done."