PaddleSpeech/data/librispeech/librispeech.py

"""Prepare Librispeech ASR datasets.

Download, unpack and create manifest files.
Manifest file is a json-format file with each line containing the
meta data (i.e. audio filepath, transcript and audio duration)
of each audio file in the data set.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import distutils.util
import os
import sys
import tarfile
import argparse
import soundfile
import json
import codecs
from paddle.v2.dataset.common import md5file

DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech')

URL_ROOT = "http://www.openslr.org/resources/12"
URL_TEST_CLEAN = URL_ROOT + "/test-clean.tar.gz"
URL_TEST_OTHER = URL_ROOT + "/test-other.tar.gz"
URL_DEV_CLEAN = URL_ROOT + "/dev-clean.tar.gz"
URL_DEV_OTHER = URL_ROOT + "/dev-other.tar.gz"
URL_TRAIN_CLEAN_100 = URL_ROOT + "/train-clean-100.tar.gz"
URL_TRAIN_CLEAN_360 = URL_ROOT + "/train-clean-360.tar.gz"
URL_TRAIN_OTHER_500 = URL_ROOT + "/train-other-500.tar.gz"

MD5_TEST_CLEAN = "32fa31d27d2e1cad72775fee3f4849a9"
MD5_TEST_OTHER = "fb5a50374b501bb3bac4815ee91d3135"
MD5_DEV_CLEAN = "42e2234ba48799c1f50f24a7926300a1"
MD5_DEV_OTHER = "c8d0bcc9cca99d4f8b62fcc847357931"
MD5_TRAIN_CLEAN_100 = "2a93770f6d5c6c964bc36631d331a522"
MD5_TRAIN_CLEAN_360 = "c0e676e450a7ff2f54aeade5171606fa"
MD5_TRAIN_OTHER_500 = "d1a0fd59409feb2c614ce4d30c387708"

parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
    "--target_dir",
    default=DATA_HOME + "/Libri",
    type=str,
    help="Directory to save the dataset. (default: %(default)s)")
parser.add_argument(
    "--manifest_prefix",
    default="manifest",
    type=str,
    help="Filepath prefix for output manifests. (default: %(default)s)")
parser.add_argument(
    "--full_download",
    default="True",
    type=distutils.util.strtobool,
    help="Download all datasets for Librispeech."
    " If False, only download a minimal requirement (test-clean, dev-clean"
    " train-clean-100). (default: %(default)s)")
args = parser.parse_args()


def download(url, md5sum, target_dir):
    """
    Download file from url to target_dir, and check md5sum.
    """
    if not os.path.exists(target_dir): os.makedirs(target_dir)
    filepath = os.path.join(target_dir, url.split("/")[-1])
    if not (os.path.exists(filepath) and md5file(filepath) == md5sum):
        print("Downloading %s ..." % url)
        os.system("wget -c " + url + " -P " + target_dir)
        print("\nMD5 Chesksum %s ..." % filepath)
        if not md5file(filepath) == md5sum:
            raise RuntimeError("MD5 checksum failed.")
    else:
        print("File exists, skip downloading. (%s)" % filepath)
    return filepath


def unpack(filepath, target_dir):
    """
    Unpack the file to the target_dir.
    """
    print("Unpacking %s ..." % filepath)
    tar = tarfile.open(filepath)
    tar.extractall(target_dir)
    tar.close()


def create_manifest(data_dir, manifest_path):
    """
    Create a manifest json file summarizing the data set, with each line
    containing the meta data (i.e. audio filepath, transcription text, audio
    duration) of each audio file within the data set.
    """
    print("Creating manifest %s ..." % manifest_path)
    json_lines = []
    for subfolder, _, filelist in sorted(os.walk(data_dir)):
        text_filelist = [
            filename for filename in filelist if filename.endswith('trans.txt')
        ]
        if len(text_filelist) > 0:
            text_filepath = os.path.join(data_dir, subfolder, text_filelist[0])
            for line in open(text_filepath):
                segments = line.strip().split()
                text = ' '.join(segments[1:]).lower()
                audio_filepath = os.path.join(data_dir, subfolder,
                                              segments[0] + '.flac')
                audio_data, samplerate = soundfile.read(audio_filepath)
                duration = float(len(audio_data)) / samplerate
                json_lines.append(
                    json.dumps({
                        'audio_filepath': audio_filepath,
                        'duration': duration,
                        'text': text
                    }))
    with codecs.open(manifest_path, 'w', 'utf-8') as out_file:
        for line in json_lines:
            out_file.write(line + '\n')


def prepare_dataset(url, md5sum, target_dir, manifest_path):
    """
    Download, unpack and create summmary manifest file.
    """
    if not os.path.exists(os.path.join(target_dir, "LibriSpeech")):
        # download
        filepath = download(url, md5sum, target_dir)
        # unpack
        unpack(filepath, target_dir)
    else:
        print("Skip downloading and unpacking. Data already exists in %s." %
              target_dir)
    # create manifest json file
    create_manifest(target_dir, manifest_path)


def main():
    prepare_dataset(
        url=URL_TEST_CLEAN,
        md5sum=MD5_TEST_CLEAN,
        target_dir=os.path.join(args.target_dir, "test-clean"),
        manifest_path=args.manifest_prefix + ".test-clean")
    prepare_dataset(
        url=URL_DEV_CLEAN,
        md5sum=MD5_DEV_CLEAN,
        target_dir=os.path.join(args.target_dir, "dev-clean"),
        manifest_path=args.manifest_prefix + ".dev-clean")
    prepare_dataset(
        url=URL_TRAIN_CLEAN_100,
        md5sum=MD5_TRAIN_CLEAN_100,
        target_dir=os.path.join(args.target_dir, "train-clean-100"),
        manifest_path=args.manifest_prefix + ".train-clean-100")
    if args.full_download:
        prepare_dataset(
            url=URL_TEST_OTHER,
            md5sum=MD5_TEST_OTHER,
            target_dir=os.path.join(args.target_dir, "test-other"),
            manifest_path=args.manifest_prefix + ".test-other")
        prepare_dataset(
            url=URL_DEV_OTHER,
            md5sum=MD5_DEV_OTHER,
            target_dir=os.path.join(args.target_dir, "dev-other"),
            manifest_path=args.manifest_prefix + ".dev-other")
        prepare_dataset(
            url=URL_TRAIN_CLEAN_360,
            md5sum=MD5_TRAIN_CLEAN_360,
            target_dir=os.path.join(args.target_dir, "train-clean-360"),
            manifest_path=args.manifest_prefix + ".train-clean-360")
        prepare_dataset(
            url=URL_TRAIN_OTHER_500,
            md5sum=MD5_TRAIN_OTHER_500,
            target_dir=os.path.join(args.target_dir, "train-other-500"),
            manifest_path=args.manifest_prefix + ".train-other-500")


if __name__ == '__main__':
    main()
Add function, class and module docs for data parts in DS2. 7 years ago			`"""Prepare Librispeech ASR datasets.`
Add function docs. 8 years ago
Add function, class and module docs for data parts in DS2. 7 years ago			`Download, unpack and create manifest files.`
			`Manifest file is a json-format file with each line containing the`
			`meta data (i.e. audio filepath, transcript and audio duration)`
			`of each audio file in the data set.`
Add function docs. 8 years ago			`"""`
Add function, class and module docs for data parts in DS2. 7 years ago			`from __future__ import absolute_import`
			`from __future__ import division`
			`from __future__ import print_function`
Add function docs. 8 years ago
Refine librispeech.py for DeepSpeech2. Summary: 1. Add manifest line check. 2. Avoid re-unpacking if unpacked data already exists. 3. Add full_download (download all 7 sub-datasets of LibriSpeech). 8 years ago			`import distutils.util`
Add librispeech dataset, audio data provider and simplfied DeepSpeech2 model configuration. Bug exists when run training. 8 years ago			`import os`
change the wget method in run.sh of deep_speech2 7 years ago			`import sys`
Add librispeech dataset, audio data provider and simplfied DeepSpeech2 model configuration. Bug exists when run training. 8 years ago			`import tarfile`
			`import argparse`
			`import soundfile`
			`import json`
Unify encoding to 'utf-8' and optimize error rate calculation. 7 years ago			`import codecs`
Add more test cases and make DP more clear. 7 years ago			`from paddle.v2.dataset.common import md5file`
Add librispeech dataset, audio data provider and simplfied DeepSpeech2 model configuration. Bug exists when run training. 8 years ago
Update DS2 README.md and fix bug in librispeech.py 8 years ago			`DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech')`
Add librispeech dataset, audio data provider and simplfied DeepSpeech2 model configuration. Bug exists when run training. 8 years ago
1. Fix incorrect decoder result printing. 2. Fix incorrect batch-norm usage in RNN. 3. Fix overlapping train/dev/test manfests. 4. Update README.md and requirements.txt. 5. Expose more arguments to users in argparser. 6. Update all other details. 8 years ago			`URL_ROOT = "http://www.openslr.org/resources/12"`
			`URL_TEST_CLEAN = URL_ROOT + "/test-clean.tar.gz"`
			`URL_TEST_OTHER = URL_ROOT + "/test-other.tar.gz"`
			`URL_DEV_CLEAN = URL_ROOT + "/dev-clean.tar.gz"`
			`URL_DEV_OTHER = URL_ROOT + "/dev-other.tar.gz"`
			`URL_TRAIN_CLEAN_100 = URL_ROOT + "/train-clean-100.tar.gz"`
			`URL_TRAIN_CLEAN_360 = URL_ROOT + "/train-clean-360.tar.gz"`
			`URL_TRAIN_OTHER_500 = URL_ROOT + "/train-other-500.tar.gz"`

			`MD5_TEST_CLEAN = "32fa31d27d2e1cad72775fee3f4849a9"`
Refine librispeech.py for DeepSpeech2. Summary: 1. Add manifest line check. 2. Avoid re-unpacking if unpacked data already exists. 3. Add full_download (download all 7 sub-datasets of LibriSpeech). 8 years ago			`MD5_TEST_OTHER = "fb5a50374b501bb3bac4815ee91d3135"`
1. Fix incorrect decoder result printing. 2. Fix incorrect batch-norm usage in RNN. 3. Fix overlapping train/dev/test manfests. 4. Update README.md and requirements.txt. 5. Expose more arguments to users in argparser. 6. Update all other details. 8 years ago			`MD5_DEV_CLEAN = "42e2234ba48799c1f50f24a7926300a1"`
Refine librispeech.py for DeepSpeech2. Summary: 1. Add manifest line check. 2. Avoid re-unpacking if unpacked data already exists. 3. Add full_download (download all 7 sub-datasets of LibriSpeech). 8 years ago			`MD5_DEV_OTHER = "c8d0bcc9cca99d4f8b62fcc847357931"`
1. Fix incorrect decoder result printing. 2. Fix incorrect batch-norm usage in RNN. 3. Fix overlapping train/dev/test manfests. 4. Update README.md and requirements.txt. 5. Expose more arguments to users in argparser. 6. Update all other details. 8 years ago			`MD5_TRAIN_CLEAN_100 = "2a93770f6d5c6c964bc36631d331a522"`
			`MD5_TRAIN_CLEAN_360 = "c0e676e450a7ff2f54aeade5171606fa"`
Update DS2 README.md and fix bug in librispeech.py 8 years ago			`MD5_TRAIN_OTHER_500 = "d1a0fd59409feb2c614ce4d30c387708"`
Add librispeech dataset, audio data provider and simplfied DeepSpeech2 model configuration. Bug exists when run training. 8 years ago
Add shuffle type of instance_shuffle and batch_shuffle_clipped. 7 years ago			`parser = argparse.ArgumentParser(description=__doc__)`
Add librispeech dataset, audio data provider and simplfied DeepSpeech2 model configuration. Bug exists when run training. 8 years ago			`parser.add_argument(`
			`"--target_dir",`
			`default=DATA_HOME + "/Libri",`
			`type=str,`
1. Fix incorrect decoder result printing. 2. Fix incorrect batch-norm usage in RNN. 3. Fix overlapping train/dev/test manfests. 4. Update README.md and requirements.txt. 5. Expose more arguments to users in argparser. 6. Update all other details. 8 years ago			`help="Directory to save the dataset. (default: %(default)s)")`
Add librispeech dataset, audio data provider and simplfied DeepSpeech2 model configuration. Bug exists when run training. 8 years ago			`parser.add_argument(`
1. Fix incorrect decoder result printing. 2. Fix incorrect batch-norm usage in RNN. 3. Fix overlapping train/dev/test manfests. 4. Update README.md and requirements.txt. 5. Expose more arguments to users in argparser. 6. Update all other details. 8 years ago			`"--manifest_prefix",`
Refactor whole data preprocessor for DS2 (re-design classes, re-organize dir, add augmentaion interfaces etc.). 1. Refactor data preprocessor with new added class AudioSegment, SpeechSegment, TextFeaturizer, AudioFeaturizer, SpeechFeaturizer. 2. Add data augmentation interfaces and class AugmentorBase, AugmentationPipeline, VolumnPerturbAugmentor etc.. 3. Seperate normalizer's mean and std computing from training, by adding FeatureNormalizer and a seperate tool compute_mean_std.py. 4. Re-organize directory. 7 years ago			`default="manifest",`
Add librispeech dataset, audio data provider and simplfied DeepSpeech2 model configuration. Bug exists when run training. 8 years ago			`type=str,`
1. Fix incorrect decoder result printing. 2. Fix incorrect batch-norm usage in RNN. 3. Fix overlapping train/dev/test manfests. 4. Update README.md and requirements.txt. 5. Expose more arguments to users in argparser. 6. Update all other details. 8 years ago			`help="Filepath prefix for output manifests. (default: %(default)s)")`
Refine librispeech.py for DeepSpeech2. Summary: 1. Add manifest line check. 2. Avoid re-unpacking if unpacked data already exists. 3. Add full_download (download all 7 sub-datasets of LibriSpeech). 8 years ago			`parser.add_argument(`
			`"--full_download",`
			`default="True",`
			`type=distutils.util.strtobool,`
			`help="Download all datasets for Librispeech."`
			`" If False, only download a minimal requirement (test-clean, dev-clean"`
			`" train-clean-100). (default: %(default)s)")`
Add librispeech dataset, audio data provider and simplfied DeepSpeech2 model configuration. Bug exists when run training. 8 years ago			`args = parser.parse_args()`


1. Fix incorrect decoder result printing. 2. Fix incorrect batch-norm usage in RNN. 3. Fix overlapping train/dev/test manfests. 4. Update README.md and requirements.txt. 5. Expose more arguments to users in argparser. 6. Update all other details. 8 years ago			`def download(url, md5sum, target_dir):`
			`"""`
			`Download file from url to target_dir, and check md5sum.`
			`"""`
			`if not os.path.exists(target_dir): os.makedirs(target_dir)`
Add librispeech dataset, audio data provider and simplfied DeepSpeech2 model configuration. Bug exists when run training. 8 years ago			`filepath = os.path.join(target_dir, url.split("/")[-1])`
1. Fix incorrect decoder result printing. 2. Fix incorrect batch-norm usage in RNN. 3. Fix overlapping train/dev/test manfests. 4. Update README.md and requirements.txt. 5. Expose more arguments to users in argparser. 6. Update all other details. 8 years ago			`if not (os.path.exists(filepath) and md5file(filepath) == md5sum):`
Add librispeech dataset, audio data provider and simplfied DeepSpeech2 model configuration. Bug exists when run training. 8 years ago			`print("Downloading %s ..." % url)`
change the wget method in run.sh of deep_speech2 7 years ago			`os.system("wget -c " + url + " -P " + target_dir)`
1. Fix incorrect decoder result printing. 2. Fix incorrect batch-norm usage in RNN. 3. Fix overlapping train/dev/test manfests. 4. Update README.md and requirements.txt. 5. Expose more arguments to users in argparser. 6. Update all other details. 8 years ago			`print("\nMD5 Chesksum %s ..." % filepath)`
Refine librispeech.py for DeepSpeech2. Summary: 1. Add manifest line check. 2. Avoid re-unpacking if unpacked data already exists. 3. Add full_download (download all 7 sub-datasets of LibriSpeech). 8 years ago			`if not md5file(filepath) == md5sum:`
			`raise RuntimeError("MD5 checksum failed.")`
			`else:`
			`print("File exists, skip downloading. (%s)" % filepath)`
Add librispeech dataset, audio data provider and simplfied DeepSpeech2 model configuration. Bug exists when run training. 8 years ago			`return filepath`


			`def unpack(filepath, target_dir):`
1. Fix incorrect decoder result printing. 2. Fix incorrect batch-norm usage in RNN. 3. Fix overlapping train/dev/test manfests. 4. Update README.md and requirements.txt. 5. Expose more arguments to users in argparser. 6. Update all other details. 8 years ago			`"""`
			`Unpack the file to the target_dir.`
			`"""`
Add librispeech dataset, audio data provider and simplfied DeepSpeech2 model configuration. Bug exists when run training. 8 years ago			`print("Unpacking %s ..." % filepath)`
			`tar = tarfile.open(filepath)`
			`tar.extractall(target_dir)`
			`tar.close()`


			`def create_manifest(data_dir, manifest_path):`
1. Fix incorrect decoder result printing. 2. Fix incorrect batch-norm usage in RNN. 3. Fix overlapping train/dev/test manfests. 4. Update README.md and requirements.txt. 5. Expose more arguments to users in argparser. 6. Update all other details. 8 years ago			`"""`
Remove manifest's line number check from librispeech.py and update README.md. 7 years ago			`Create a manifest json file summarizing the data set, with each line`
			`containing the meta data (i.e. audio filepath, transcription text, audio`
			`duration) of each audio file within the data set.`
1. Fix incorrect decoder result printing. 2. Fix incorrect batch-norm usage in RNN. 3. Fix overlapping train/dev/test manfests. 4. Update README.md and requirements.txt. 5. Expose more arguments to users in argparser. 6. Update all other details. 8 years ago			`"""`
Add librispeech dataset, audio data provider and simplfied DeepSpeech2 model configuration. Bug exists when run training. 8 years ago			`print("Creating manifest %s ..." % manifest_path)`
			`json_lines = []`
Refine librispeech.py for DeepSpeech2. Summary: 1. Add manifest line check. 2. Avoid re-unpacking if unpacked data already exists. 3. Add full_download (download all 7 sub-datasets of LibriSpeech). 8 years ago			`for subfolder, _, filelist in sorted(os.walk(data_dir)):`
Add librispeech dataset, audio data provider and simplfied DeepSpeech2 model configuration. Bug exists when run training. 8 years ago			`text_filelist = [`
			`filename for filename in filelist if filename.endswith('trans.txt')`
			`]`
			`if len(text_filelist) > 0:`
			`text_filepath = os.path.join(data_dir, subfolder, text_filelist[0])`
			`for line in open(text_filepath):`
			`segments = line.strip().split()`
			`text = ' '.join(segments[1:]).lower()`
			`audio_filepath = os.path.join(data_dir, subfolder,`
			`segments[0] + '.flac')`
			`audio_data, samplerate = soundfile.read(audio_filepath)`
			`duration = float(len(audio_data)) / samplerate`
			`json_lines.append(`
			`json.dumps({`
			`'audio_filepath': audio_filepath,`
			`'duration': duration,`
			`'text': text`
			`}))`
Unify encoding to 'utf-8' and optimize error rate calculation. 7 years ago			`with codecs.open(manifest_path, 'w', 'utf-8') as out_file:`
Add librispeech dataset, audio data provider and simplfied DeepSpeech2 model configuration. Bug exists when run training. 8 years ago			`for line in json_lines:`
			`out_file.write(line + '\n')`


Remove manifest's line number check from librispeech.py and update README.md. 7 years ago			`def prepare_dataset(url, md5sum, target_dir, manifest_path):`
1. Fix incorrect decoder result printing. 2. Fix incorrect batch-norm usage in RNN. 3. Fix overlapping train/dev/test manfests. 4. Update README.md and requirements.txt. 5. Expose more arguments to users in argparser. 6. Update all other details. 8 years ago			`"""`
			`Download, unpack and create summmary manifest file.`
			`"""`
Refine librispeech.py for DeepSpeech2. Summary: 1. Add manifest line check. 2. Avoid re-unpacking if unpacked data already exists. 3. Add full_download (download all 7 sub-datasets of LibriSpeech). 8 years ago			`if not os.path.exists(os.path.join(target_dir, "LibriSpeech")):`
Remove manifest's line number check from librispeech.py and update README.md. 7 years ago			`# download`
			`filepath = download(url, md5sum, target_dir)`
			`# unpack`
Refine librispeech.py for DeepSpeech2. Summary: 1. Add manifest line check. 2. Avoid re-unpacking if unpacked data already exists. 3. Add full_download (download all 7 sub-datasets of LibriSpeech). 8 years ago			`unpack(filepath, target_dir)`
			`else:`
Remove manifest's line number check from librispeech.py and update README.md. 7 years ago			`print("Skip downloading and unpacking. Data already exists in %s." %`
			`target_dir)`
			`# create manifest json file`
Refine librispeech.py for DeepSpeech2. Summary: 1. Add manifest line check. 2. Avoid re-unpacking if unpacked data already exists. 3. Add full_download (download all 7 sub-datasets of LibriSpeech). 8 years ago			`create_manifest(target_dir, manifest_path)`
Add librispeech dataset, audio data provider and simplfied DeepSpeech2 model configuration. Bug exists when run training. 8 years ago

			`def main():`
			`prepare_dataset(`
1. Fix incorrect decoder result printing. 2. Fix incorrect batch-norm usage in RNN. 3. Fix overlapping train/dev/test manfests. 4. Update README.md and requirements.txt. 5. Expose more arguments to users in argparser. 6. Update all other details. 8 years ago			`url=URL_TEST_CLEAN,`
			`md5sum=MD5_TEST_CLEAN,`
			`target_dir=os.path.join(args.target_dir, "test-clean"),`
Remove manifest's line number check from librispeech.py and update README.md. 7 years ago			`manifest_path=args.manifest_prefix + ".test-clean")`
Add librispeech dataset, audio data provider and simplfied DeepSpeech2 model configuration. Bug exists when run training. 8 years ago			`prepare_dataset(`
1. Fix incorrect decoder result printing. 2. Fix incorrect batch-norm usage in RNN. 3. Fix overlapping train/dev/test manfests. 4. Update README.md and requirements.txt. 5. Expose more arguments to users in argparser. 6. Update all other details. 8 years ago			`url=URL_DEV_CLEAN,`
			`md5sum=MD5_DEV_CLEAN,`
			`target_dir=os.path.join(args.target_dir, "dev-clean"),`
Remove manifest's line number check from librispeech.py and update README.md. 7 years ago			`manifest_path=args.manifest_prefix + ".dev-clean")`
Add function docs. 8 years ago			`prepare_dataset(`
1. Fix incorrect decoder result printing. 2. Fix incorrect batch-norm usage in RNN. 3. Fix overlapping train/dev/test manfests. 4. Update README.md and requirements.txt. 5. Expose more arguments to users in argparser. 6. Update all other details. 8 years ago			`url=URL_TRAIN_CLEAN_100,`
			`md5sum=MD5_TRAIN_CLEAN_100,`
			`target_dir=os.path.join(args.target_dir, "train-clean-100"),`
Remove manifest's line number check from librispeech.py and update README.md. 7 years ago			`manifest_path=args.manifest_prefix + ".train-clean-100")`
Refine librispeech.py for DeepSpeech2. Summary: 1. Add manifest line check. 2. Avoid re-unpacking if unpacked data already exists. 3. Add full_download (download all 7 sub-datasets of LibriSpeech). 8 years ago			`if args.full_download:`
			`prepare_dataset(`
			`url=URL_TEST_OTHER,`
			`md5sum=MD5_TEST_OTHER,`
			`target_dir=os.path.join(args.target_dir, "test-other"),`
Remove manifest's line number check from librispeech.py and update README.md. 7 years ago			`manifest_path=args.manifest_prefix + ".test-other")`
Refine librispeech.py for DeepSpeech2. Summary: 1. Add manifest line check. 2. Avoid re-unpacking if unpacked data already exists. 3. Add full_download (download all 7 sub-datasets of LibriSpeech). 8 years ago			`prepare_dataset(`
			`url=URL_DEV_OTHER,`
			`md5sum=MD5_DEV_OTHER,`
			`target_dir=os.path.join(args.target_dir, "dev-other"),`
Remove manifest's line number check from librispeech.py and update README.md. 7 years ago			`manifest_path=args.manifest_prefix + ".dev-other")`
Refine librispeech.py for DeepSpeech2. Summary: 1. Add manifest line check. 2. Avoid re-unpacking if unpacked data already exists. 3. Add full_download (download all 7 sub-datasets of LibriSpeech). 8 years ago			`prepare_dataset(`
			`url=URL_TRAIN_CLEAN_360,`
			`md5sum=MD5_TRAIN_CLEAN_360,`
			`target_dir=os.path.join(args.target_dir, "train-clean-360"),`
Remove manifest's line number check from librispeech.py and update README.md. 7 years ago			`manifest_path=args.manifest_prefix + ".train-clean-360")`
Refine librispeech.py for DeepSpeech2. Summary: 1. Add manifest line check. 2. Avoid re-unpacking if unpacked data already exists. 3. Add full_download (download all 7 sub-datasets of LibriSpeech). 8 years ago			`prepare_dataset(`
			`url=URL_TRAIN_OTHER_500,`
			`md5sum=MD5_TRAIN_OTHER_500,`
			`target_dir=os.path.join(args.target_dir, "train-other-500"),`
Remove manifest's line number check from librispeech.py and update README.md. 7 years ago			`manifest_path=args.manifest_prefix + ".train-other-500")`
Add librispeech dataset, audio data provider and simplfied DeepSpeech2 model configuration. Bug exists when run training. 8 years ago

			`if __name__ == '__main__':`
			`main()`