Rename some folders and update examples.

pull/2/head
Xinghai Sun 7 years ago
parent a00a436b52
commit ae7ef7929a

@ -41,7 +41,7 @@ MD5_TRAIN_OTHER_500 = "d1a0fd59409feb2c614ce4d30c387708"
parser = argparse.ArgumentParser(description=__doc__) parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument( parser.add_argument(
"--target_dir", "--target_dir",
default=DATA_HOME + "/Libri", default=DATA_HOME + "/libri",
type=str, type=str,
help="Directory to save the dataset. (default: %(default)s)") help="Directory to save the dataset. (default: %(default)s)")
parser.add_argument( parser.add_argument(

@ -0,0 +1,126 @@
"""Prepare Librispeech ASR datasets.
Download, unpack and create manifest files.
Manifest file is a json-format file with each line containing the
meta data (i.e. audio filepath, transcript and audio duration)
of each audio file in the data set.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import distutils.util
import os
import sys
import tarfile
import argparse
import soundfile
import json
import codecs
from paddle.v2.dataset.common import md5file
DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech')
URL_ROOT = "http://www.openslr.org/resources/12"
URL_DEV_CLEAN = URL_ROOT + "/dev-clean.tar.gz"
MD5_DEV_CLEAN = "42e2234ba48799c1f50f24a7926300a1"
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"--target_dir",
default=DATA_HOME + "/tiny",
type=str,
help="Directory to save the dataset. (default: %(default)s)")
parser.add_argument(
"--manifest_prefix",
default="manifest",
type=str,
help="Filepath prefix for output manifests. (default: %(default)s)")
args = parser.parse_args()
def download(url, md5sum, target_dir):
"""
Download file from url to target_dir, and check md5sum.
"""
if not os.path.exists(target_dir): os.makedirs(target_dir)
filepath = os.path.join(target_dir, url.split("/")[-1])
if not (os.path.exists(filepath) and md5file(filepath) == md5sum):
print("Downloading %s ..." % url)
os.system("wget -c " + url + " -P " + target_dir)
print("\nMD5 Chesksum %s ..." % filepath)
if not md5file(filepath) == md5sum:
raise RuntimeError("MD5 checksum failed.")
else:
print("File exists, skip downloading. (%s)" % filepath)
return filepath
def unpack(filepath, target_dir):
"""
Unpack the file to the target_dir.
"""
print("Unpacking %s ..." % filepath)
tar = tarfile.open(filepath)
tar.extractall(target_dir)
tar.close()
def create_manifest(data_dir, manifest_path):
"""
Create a manifest json file summarizing the data set, with each line
containing the meta data (i.e. audio filepath, transcription text, audio
duration) of each audio file within the data set.
"""
print("Creating manifest %s ..." % manifest_path)
json_lines = []
for subfolder, _, filelist in sorted(os.walk(data_dir)):
text_filelist = [
filename for filename in filelist if filename.endswith('trans.txt')
]
if len(text_filelist) > 0:
text_filepath = os.path.join(data_dir, subfolder, text_filelist[0])
for line in open(text_filepath):
segments = line.strip().split()
text = ' '.join(segments[1:]).lower()
audio_filepath = os.path.join(data_dir, subfolder,
segments[0] + '.flac')
audio_data, samplerate = soundfile.read(audio_filepath)
duration = float(len(audio_data)) / samplerate
json_lines.append(
json.dumps({
'audio_filepath': audio_filepath,
'duration': duration,
'text': text
}))
with codecs.open(manifest_path, 'w', 'utf-8') as out_file:
for line in json_lines:
out_file.write(line + '\n')
def prepare_dataset(url, md5sum, target_dir, manifest_path):
"""
Download, unpack and create summmary manifest file.
"""
if not os.path.exists(os.path.join(target_dir, "LibriSpeech")):
# download
filepath = download(url, md5sum, target_dir)
# unpack
unpack(filepath, target_dir)
else:
print("Skip downloading and unpacking. Data already exists in %s." %
target_dir)
# create manifest json file
create_manifest(target_dir, manifest_path)
def main():
prepare_dataset(
url=URL_DEV_CLEAN,
md5sum=MD5_DEV_CLEAN,
target_dir=os.path.join(args.target_dir, "dev-clean"),
manifest_path=args.manifest_prefix + ".dev-clean")
if __name__ == '__main__':
main()

@ -16,7 +16,7 @@ fi
cat data/librispeech/manifest.train* | shuf > data/librispeech/manifest.train cat data/librispeech/manifest.train* | shuf > data/librispeech/manifest.train
# build vocabulary (for English data, we can just skip this) # build vocabulary (can be skipped for English, as already provided)
# python tools/build_vocab.py \ # python tools/build_vocab.py \
# --count_threshold=0 \ # --count_threshold=0 \
# --vocab_path='data/librispeech/eng_vocab.txt' \ # --vocab_path='data/librispeech/eng_vocab.txt' \

@ -1,39 +0,0 @@
#! /usr/bin/bash
pushd ../..
# download data, generate manifests
python data/librispeech/librispeech.py \
--manifest_prefix='data/librispeech/manifest' \
--full_download='True' \
--target_dir='~/.cache/paddle/dataset/speech/Libri'
if [ $? -ne 0 ]; then
echo "Prepare LibriSpeech failed. Terminated."
exit 1
fi
cat data/librispeech/manifest.train* | shuf > data/librispeech/manifest.train
# build vocabulary (for English data, we can just skip this)
# python tools/build_vocab.py \
# --count_threshold=0 \
# --vocab_path='data/librispeech/eng_vocab.txt' \
# --manifest_paths='data/librispeech/manifeset.train'
# compute mean and stddev for normalizer
python tools/compute_mean_std.py \
--manifest_path='data/librispeech/manifest.train' \
--num_samples=2000 \
--specgram_type='linear' \
--output_path='data/librispeech/mean_std.npz'
if [ $? -ne 0 ]; then
echo "Compute mean and stddev failed. Terminated."
exit 1
fi
echo "LibriSpeech Data preparation done."

@ -0,0 +1,45 @@
#! /usr/bin/bash
pushd ../..
# download data, generate manifests
python data/tiny/tiny.py \
--manifest_prefix='data/tiny/manifest' \
--target_dir=$HOME'/.cache/paddle/dataset/speech/tiny'
if [ $? -ne 0 ]; then
echo "Prepare LibriSpeech failed. Terminated."
exit 1
fi
cat data/tiny/manifest.dev-clean | head -n 32 > data/tiny/manifest.train
cat data/tiny/manifest.dev-clean | head -n 48 | tail -n 16 > data/tiny/manifest.dev
cat data/tiny/manifest.dev-clean | head -n 64 | tail -n 16 > data/tiny/manifest.test
# build vocabulary
python tools/build_vocab.py \
--count_threshold=0 \
--vocab_path='data/tiny/vocab.txt' \
--manifest_paths='data/tiny/manifest.train'
if [ $? -ne 0 ]; then
echo "Build vocabulary failed. Terminated."
exit 1
fi
# compute mean and stddev for normalizer
python tools/compute_mean_std.py \
--manifest_path='data/tiny/manifest.train' \
--num_samples=32 \
--specgram_type='linear' \
--output_path='data/tiny/mean_std.npz'
if [ $? -ne 0 ]; then
echo "Compute mean and stddev failed. Terminated."
exit 1
fi
echo "Tiny data preparation done."

@ -4,7 +4,7 @@ pushd ../..
CUDA_VISIBLE_DEVICES=0 \ CUDA_VISIBLE_DEVICES=0 \
python -u infer.py \ python -u infer.py \
--num_samples=10 \ --num_samples=4 \
--trainer_count=1 \ --trainer_count=1 \
--beam_size=500 \ --beam_size=500 \
--num_proc_bsearch=12 \ --num_proc_bsearch=12 \
@ -17,11 +17,11 @@ python -u infer.py \
--use_gru=False \ --use_gru=False \
--use_gpu=True \ --use_gpu=True \
--share_rnn_weights=True \ --share_rnn_weights=True \
--infer_manifest='data/librispeech/manifest.dev-clean' \ --infer_manifest='data/tiny/manifest.train' \
--mean_std_path='data/librispeech/mean_std.npz' \ --mean_std_path='data/tiny/mean_std.npz' \
--vocab_path='data/librispeech/eng_vocab.txt' \ --vocab_path='data/tiny/vocab.txt' \
--model_path='checkpoints/params.latest.tar.gz' \ --model_path='checkpoints/params.pass-14.tar.gz' \
--lang_model_path='lm/data/common_crawl_00.prune01111.trie.klm' \ --lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \
--decoding_method='ctc_beam_search' \ --decoding_method='ctc_beam_search' \
--error_rate_type='wer' \ --error_rate_type='wer' \
--specgram_type='linear' --specgram_type='linear'

@ -2,17 +2,17 @@
pushd ../.. pushd ../..
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ CUDA_VISIBLE_DEVICES=0,1 \
python -u train.py \ python -u train.py \
--batch_size=256 \ --batch_size=2 \
--trainer_count=8 \ --trainer_count=1 \
--num_passes=50 \ --num_passes=10 \
--num_proc_data=12 \ --num_proc_data=1 \
--num_conv_layers=2 \ --num_conv_layers=2 \
--num_rnn_layers=3 \ --num_rnn_layers=3 \
--rnn_layer_size=2048 \ --rnn_layer_size=2048 \
--num_iter_print=100 \ --num_iter_print=100 \
--learning_rate=5e-4 \ --learning_rate=5e-5 \
--max_duration=27.0 \ --max_duration=27.0 \
--min_duration=0.0 \ --min_duration=0.0 \
--use_sortagrad=True \ --use_sortagrad=True \
@ -20,10 +20,10 @@ python -u train.py \
--use_gpu=True \ --use_gpu=True \
--is_local=True \ --is_local=True \
--share_rnn_weights=True \ --share_rnn_weights=True \
--train_manifest='data/librispeech/manifest.train' \ --train_manifest='data/tiny/manifest.train' \
--dev_manifest='data/librispeech/manifest.dev' \ --dev_manifest='data/tiny/manifest.train' \
--mean_std_path='data/librispeech/mean_std.npz' \ --mean_std_path='data/tiny/mean_std.npz' \
--vocab_path='data/librispeech/eng_vocab.txt' \ --vocab_path='data/tiny/vocab.txt' \
--output_model_dir='./checkpoints' \ --output_model_dir='./checkpoints' \
--augment_conf_path='conf/augmentation.config' \ --augment_conf_path='conf/augmentation.config' \
--specgram_type='linear' \ --specgram_type='linear' \

@ -7,7 +7,7 @@ import argparse
import functools import functools
import paddle.v2 as paddle import paddle.v2 as paddle
from data_utils.data import DataGenerator from data_utils.data import DataGenerator
from models.model import DeepSpeech2Model from model_utils.model import DeepSpeech2Model
from utils.error_rate import wer, cer from utils.error_rate import wer, cer
from utils.utility import add_arguments, print_arguments from utils.utility import add_arguments, print_arguments
@ -35,10 +35,10 @@ add_arg('mean_std_path', str,
'data/librispeech/mean_std.npz', 'data/librispeech/mean_std.npz',
"Filepath of normalizer's mean & std.") "Filepath of normalizer's mean & std.")
add_arg('vocab_path', str, add_arg('vocab_path', str,
'data/librispeech/eng_vocab.txt', 'data/librispeech/vocab.txt',
"Filepath of vocabulary.") "Filepath of vocabulary.")
add_arg('lang_model_path', str, add_arg('lang_model_path', str,
'lm/data/common_crawl_00.prune01111.trie.klm', 'model_zoo/lm/common_crawl_00.prune01111.trie.klm',
"Filepath for language model.") "Filepath for language model.")
add_arg('model_path', str, add_arg('model_path', str,
'./checkpoints/params.latest.tar.gz', './checkpoints/params.latest.tar.gz',

@ -180,6 +180,8 @@ def ctc_beam_search_decoder(probs_seq,
prob = prob * ext_scoring_func(result) prob = prob * ext_scoring_func(result)
log_prob = log(prob) log_prob = log(prob)
beam_result.append((log_prob, result)) beam_result.append((log_prob, result))
else:
beam_result.append((float('-inf'), ''))
## output top beam_size decoding results ## output top beam_size decoding results
beam_result = sorted(beam_result, key=lambda asd: asd[0], reverse=True) beam_result = sorted(beam_result, key=lambda asd: asd[0], reverse=True)

@ -8,9 +8,10 @@ import os
import time import time
import gzip import gzip
import paddle.v2 as paddle import paddle.v2 as paddle
from lm.lm_scorer import LmScorer from model_utils.lm_scorer import LmScorer
from models.decoder import ctc_greedy_decoder, ctc_beam_search_decoder from model_utils.decoder import ctc_greedy_decoder, ctc_beam_search_decoder
from models.network import deep_speech_v2_network from model_utils.decoder import ctc_beam_search_decoder_batch
from model_utils.network import deep_speech_v2_network
class DeepSpeech2Model(object): class DeepSpeech2Model(object):

@ -4,7 +4,7 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import unittest import unittest
from models import decoder from model_utils import decoder
class TestDecoders(unittest.TestCase): class TestDecoders(unittest.TestCase):

@ -14,6 +14,3 @@ if [ $MD5 != $md5_tmp ]; then
echo "Fail to download the language model!" echo "Fail to download the language model!"
exit 1 exit 1
fi fi

@ -7,7 +7,7 @@ import argparse
import functools import functools
import paddle.v2 as paddle import paddle.v2 as paddle
from data_utils.data import DataGenerator from data_utils.data import DataGenerator
from models.model import DeepSpeech2Model from model_utils.model import DeepSpeech2Model
from utils.error_rate import wer, cer from utils.error_rate import wer, cer
from utils.utility import add_arguments, print_arguments from utils.utility import add_arguments, print_arguments
@ -36,14 +36,14 @@ add_arg('mean_std_path', str,
'data/librispeech/mean_std.npz', 'data/librispeech/mean_std.npz',
"Filepath of normalizer's mean & std.") "Filepath of normalizer's mean & std.")
add_arg('vocab_path', str, add_arg('vocab_path', str,
'data/librispeech/eng_vocab.txt', 'data/librispeech/vocab.txt',
"Filepath of vocabulary.") "Filepath of vocabulary.")
add_arg('model_path', str, add_arg('model_path', str,
'./checkpoints/params.latest.tar.gz', './checkpoints/params.latest.tar.gz',
"If None, the training starts from scratch, " "If None, the training starts from scratch, "
"otherwise, it resumes from the pre-trained model.") "otherwise, it resumes from the pre-trained model.")
add_arg('lang_model_path', str, add_arg('lang_model_path', str,
'lm/data/common_crawl_00.prune01111.trie.klm', 'model_zoo/lm/common_crawl_00.prune01111.trie.klm',
"Filepath for language model.") "Filepath for language model.")
add_arg('decoding_method', str, add_arg('decoding_method', str,
'ctc_beam_search', 'ctc_beam_search',

@ -21,10 +21,8 @@ add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable # yapf: disable
add_arg('count_threshold', int, 0, "Truncation threshold for char counts.") add_arg('count_threshold', int, 0, "Truncation threshold for char counts.")
add_arg('vocab_path', str, add_arg('vocab_path', str,
None, 'data/librispeech/vocab.txt',
"Filepath to write the vocabulary.", "Filepath to write the vocabulary.")
nargs='+',
required=True)
add_arg('manifest_paths', str, add_arg('manifest_paths', str,
None, None,
"Filepaths of manifests for building vocabulary. " "Filepaths of manifests for building vocabulary. "

@ -9,7 +9,7 @@ import functools
import paddle.v2 as paddle import paddle.v2 as paddle
import _init_paths import _init_paths
from data_utils.data import DataGenerator from data_utils.data import DataGenerator
from models.model import DeepSpeech2Model from model_utils.model import DeepSpeech2Model
from utils.error_rate import wer from utils.error_rate import wer
from utils.utility import add_arguments, print_arguments from utils.utility import add_arguments, print_arguments
@ -41,10 +41,10 @@ add_arg('mean_std_path', str,
'data/librispeech/mean_std.npz', 'data/librispeech/mean_std.npz',
"Filepath of normalizer's mean & std.") "Filepath of normalizer's mean & std.")
add_arg('vocab_path', str, add_arg('vocab_path', str,
'data/librispeech/eng_vocab.txt', 'data/librispeech/vocab.txt',
"Filepath of vocabulary.") "Filepath of vocabulary.")
add_arg('lang_model_path', str, add_arg('lang_model_path', str,
'lm/data/common_crawl_00.prune01111.trie.klm', 'model_zoo/lm/common_crawl_00.prune01111.trie.klm',
"Filepath for language model.") "Filepath for language model.")
add_arg('model_path', str, add_arg('model_path', str,
'./checkpoints/params.latest.tar.gz', './checkpoints/params.latest.tar.gz',

@ -6,7 +6,7 @@ from __future__ import print_function
import argparse import argparse
import functools import functools
import paddle.v2 as paddle import paddle.v2 as paddle
from models.model import DeepSpeech2Model from model_utils.model import DeepSpeech2Model
from data_utils.data import DataGenerator from data_utils.data import DataGenerator
from utils.utility import add_arguments, print_arguments from utils.utility import add_arguments, print_arguments
@ -41,7 +41,7 @@ add_arg('mean_std_path', str,
'data/librispeech/mean_std.npz', 'data/librispeech/mean_std.npz',
"Filepath of normalizer's mean & std.") "Filepath of normalizer's mean & std.")
add_arg('vocab_path', str, add_arg('vocab_path', str,
'data/librispeech/eng_vocab.txt', 'data/librispeech/vocab.txt',
"Filepath of vocabulary.") "Filepath of vocabulary.")
add_arg('init_model_path', str, add_arg('init_model_path', str,
None, None,

Loading…
Cancel
Save