From d59b8ca97e36a41ecda84e2079fa0bad585230dc Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Mon, 22 May 2017 21:00:37 +0800 Subject: [PATCH 001/335] Add deep_speech_2 folder. --- README.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 000000000..a0990367e --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +TBD From 3fc94427db7395a1b7f9ab1013ca32218830a101 Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Thu, 25 May 2017 01:17:18 +0800 Subject: [PATCH 002/335] Add librispeech dataset, audio data provider and simplfied DeepSpeech2 model configuration. Bug exists when run training. --- README.md | 8 +- audio_data_utils.py | 159 +++++++++++++++++++++++++++++++++++++ eng_vocab.txt | 28 +++++++ librispeech.py | 97 +++++++++++++++++++++++ requirements.sh | 5 ++ train.py | 188 ++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 484 insertions(+), 1 deletion(-) create mode 100644 audio_data_utils.py create mode 100644 eng_vocab.txt create mode 100644 librispeech.py create mode 100644 requirements.sh create mode 100644 train.py diff --git a/README.md b/README.md index a0990367e..fcadf5686 100644 --- a/README.md +++ b/README.md @@ -1 +1,7 @@ -TBD +# Deep Speech 2 on PaddlePaddle + +``` +sh requirements.sh +python librispeech.py +python train.py +``` diff --git a/audio_data_utils.py b/audio_data_utils.py new file mode 100644 index 000000000..2f7bfcf7c --- /dev/null +++ b/audio_data_utils.py @@ -0,0 +1,159 @@ +import paddle.v2 as paddle +import logging +import json +import random +import soundfile +import numpy as np +import os + +# TODO: add z-score normalization. + +ENGLISH_CHAR_VOCAB_FILEPATH = "eng_vocab.txt" + +logger = logging.getLogger(__name__) + + +def spectrogram_from_file(filename, + stride_ms=10, + window_ms=20, + max_freq=None, + eps=1e-14): + """ + Calculate the log of linear spectrogram from FFT energy + Refer to utils.py in https://github.com/baidu-research/ba-dls-deepspeech + """ + audio, sample_rate = soundfile.read(filename) + if audio.ndim >= 2: + audio = np.mean(audio, 1) + if max_freq is None: + max_freq = sample_rate / 2 + if max_freq > sample_rate / 2: + raise ValueError("max_freq must be greater than half of " + "sample rate.") + if stride_ms > window_ms: + raise ValueError("Stride size must not be greater than window size.") + stride_size = int(0.001 * sample_rate * stride_ms) + window_size = int(0.001 * sample_rate * window_ms) + spectrogram, freqs = extract_spectrogram( + audio, + window_size=window_size, + stride_size=stride_size, + sample_rate=sample_rate) + ind = np.where(freqs <= max_freq)[0][-1] + 1 + return np.log(spectrogram[:ind, :] + eps) + + +def extract_spectrogram(samples, window_size, stride_size, sample_rate): + """ + Compute the spectrogram for a real discrete signal. + Refer to utils.py in https://github.com/baidu-research/ba-dls-deepspeech + """ + # extract strided windows + truncate_size = (len(samples) - window_size) % stride_size + samples = samples[:len(samples) - truncate_size] + nshape = (window_size, (len(samples) - window_size) // stride_size + 1) + nstrides = (samples.strides[0], samples.strides[0] * stride_size) + windows = np.lib.stride_tricks.as_strided( + samples, shape=nshape, strides=nstrides) + assert np.all( + windows[:, 1] == samples[stride_size:(stride_size + window_size)]) + # window weighting, compute squared Fast Fourier Transform (fft), scaling + weighting = np.hanning(window_size)[:, None] + fft = np.fft.rfft(windows * weighting, axis=0) + fft = np.absolute(fft)**2 + scale = np.sum(weighting**2) * sample_rate + fft[1:-1, :] *= (2.0 / scale) + fft[(0, -1), :] /= scale + # prepare fft frequency list + freqs = float(sample_rate) / window_size * np.arange(fft.shape[0]) + return fft, freqs + + +def vocabulary_from_file(vocabulary_path): + """ + Load vocabulary from file. + """ + if os.path.exists(vocabulary_path): + vocab_lines = [] + with open(vocabulary_path, 'r') as file: + vocab_lines.extend(file.readlines()) + vocab_list = [line[:-1] for line in vocab_lines] + vocab_dict = dict( + [(token, id) for (id, token) in enumerate(vocab_list)]) + return vocab_dict, vocab_list + else: + raise ValueError("Vocabulary file %s not found.", vocabulary_path) + + +def get_vocabulary_size(): + vocab_dict, _ = vocabulary_from_file(ENGLISH_CHAR_VOCAB_FILEPATH) + return len(vocab_dict) + + +def parse_transcript(text, vocabulary): + """ + Convert the transcript text string to list of token index integers.. + """ + return [vocabulary[w] for w in text] + + +def reader_creator(manifest_path, + sort_by_duration=True, + shuffle=False, + max_duration=10.0, + min_duration=0.0): + if sort_by_duration and shuffle: + sort_by_duration = False + logger.warn("When shuffle set to true, " + "sort_by_duration is forced to set False.") + vocab_dict, _ = vocabulary_from_file(ENGLISH_CHAR_VOCAB_FILEPATH) + + def reader(): + # read manifest + manifest_data = [] + for json_line in open(manifest_path): + try: + json_data = json.loads(json_line) + except Exception as e: + raise ValueError("Error reading manifest: %s" % str(e)) + if (json_data["duration"] <= max_duration and + json_data["duration"] >= min_duration): + manifest_data.append(json_data) + # sort (by duration) or shuffle manifest + if sort_by_duration: + manifest_data.sort(key=lambda x: x["duration"]) + if shuffle: + random.shuffle(manifest_data) + # extract spectrogram feature + for instance in manifest_data: + spectrogram = spectrogram_from_file(instance["audio_filepath"]) + text = parse_transcript(instance["text"], vocab_dict) + yield (spectrogram, text) + + return reader + + +def padding_batch_reader(batch_reader, padding=[-1, -1], flatten=True): + def padding_batch(batch): + new_batch = [] + # get target shape within batch + nshape_list = [padding] + for audio, text in batch: + nshape_list.append(audio.shape) + target_shape = np.array(nshape_list).max(axis=0) + # padding + for audio, text in batch: + pad_shape = target_shape - audio.shape + assert np.all(pad_shape >= 0) + padded_audio = np.pad( + audio, [(0, pad_shape[0]), (0, pad_shape[1])], mode="constant") + if flatten: + padded_audio = padded_audio.flatten() + new_batch.append((padded_audio, text)) + return new_batch + + def new_batch_reader(): + for batch in batch_reader(): + yield padding_batch(batch) + + return new_batch_reader diff --git a/eng_vocab.txt b/eng_vocab.txt new file mode 100644 index 000000000..8268f3f33 --- /dev/null +++ b/eng_vocab.txt @@ -0,0 +1,28 @@ +' + +a +b +c +d +e +f +g +h +i +j +k +l +m +n +o +p +q +r +s +t +u +v +w +x +y +z diff --git a/librispeech.py b/librispeech.py new file mode 100644 index 000000000..fc7b9822b --- /dev/null +++ b/librispeech.py @@ -0,0 +1,97 @@ +import paddle.v2 as paddle +import os +import wget +import tarfile +import argparse +import soundfile +import json + +DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech') + +URL_TEST = "http://www.openslr.org/resources/12/test-clean.tar.gz" +URL_DEV = "http://www.openslr.org/resources/12/dev-clean.tar.gz" +URL_TRAIN = "http://www.openslr.org/resources/12/train-clean-100.tar.gz" + +parser = argparse.ArgumentParser( + description='Downloads and prepare LibriSpeech dataset.') +parser.add_argument( + "--target_dir", + default=DATA_HOME + "/Libri", + type=str, + help="Directory to save the dataset.") +parser.add_argument( + "--manifest", + default="./libri.manifest", + type=str, + help="Filepath prefix of output manifests.") +args = parser.parse_args() + + +def download(url, target_dir): + if not os.path.exists(target_dir): + os.makedirs(target_dir) + filepath = os.path.join(target_dir, url.split("/")[-1]) + if not os.path.exists(filepath): + print("Downloading %s ..." % url) + wget.download(url, target_dir) + print("") + return filepath + + +def unpack(filepath, target_dir): + print("Unpacking %s ..." % filepath) + tar = tarfile.open(filepath) + tar.extractall(target_dir) + tar.close() + return target_dir + + +def create_manifest(data_dir, manifest_path): + print("Creating manifest %s ..." % manifest_path) + json_lines = [] + for subfolder, _, filelist in os.walk(data_dir): + text_filelist = [ + filename for filename in filelist if filename.endswith('trans.txt') + ] + if len(text_filelist) > 0: + text_filepath = os.path.join(data_dir, subfolder, text_filelist[0]) + for line in open(text_filepath): + segments = line.strip().split() + text = ' '.join(segments[1:]).lower() + audio_filepath = os.path.join(data_dir, subfolder, + segments[0] + '.flac') + audio_data, samplerate = soundfile.read(audio_filepath) + duration = float(len(audio_data)) / samplerate + json_lines.append( + json.dumps({ + 'audio_filepath': audio_filepath, + 'duration': duration, + 'text': text + })) + with open(manifest_path, 'w') as out_file: + for line in json_lines: + out_file.write(line + '\n') + + +def prepare_dataset(url, target_dir, manifest_path): + filepath = download(url, target_dir) + unpacked_dir = unpack(filepath, target_dir) + create_manifest(unpacked_dir, manifest_path) + + +def main(): + prepare_dataset( + url=URL_TEST, + target_dir=os.path.join(args.target_dir), + manifest_path=args.manifest + ".test") + prepare_dataset( + url=URL_DEV, + target_dir=os.path.join(args.target_dir), + manifest_path=args.manifest + ".dev") + #prepare_dataset(url=URL_TRAIN, +#target_dir=os.path.join(args.target_dir), +#manifest_path=args.manifest + ".train") + + +if __name__ == '__main__': + main() diff --git a/requirements.sh b/requirements.sh new file mode 100644 index 000000000..7a0891699 --- /dev/null +++ b/requirements.sh @@ -0,0 +1,5 @@ +pip install wget +pip install soundfile + +# For Linux only +apt-get install libsndfile1 diff --git a/train.py b/train.py new file mode 100644 index 000000000..083a718d5 --- /dev/null +++ b/train.py @@ -0,0 +1,188 @@ +import paddle.v2 as paddle +import audio_data_utils +import argparse + +parser = argparse.ArgumentParser( + description='Simpled version of DeepSpeech2 trainer.') +parser.add_argument( + "--batch_size", default=512, type=int, help="Minibatch size.") +parser.add_argument("--trainer", default=1, type=int, help="Trainer number.") +parser.add_argument( + "--num_passes", default=20, type=int, help="Training pass number.") +args = parser.parse_args() + + +def conv_bn_layer(input, filter_size, num_channels_in, num_channels_out, stride, + padding, act): + conv_layer = paddle.layer.img_conv( + input=input, + filter_size=filter_size, + num_channels=num_channels_in, + num_filters=num_channels_out, + stride=stride, + padding=padding, + act=paddle.activation.Linear(), + bias_attr=False) + return paddle.layer.batch_norm(input=conv_layer, act=act) + + +def bidirectonal_simple_rnn_bn_layer(name, input, size, act): + def __simple_rnn_step__(input): + last_state = paddle.layer.memory(name=name + "_state", size=size) + input_fc = paddle.layer.fc( + input=input, + size=size, + act=paddle.activation.Linear(), + bias_attr=False) + input_fc_bn = paddle.layer.batch_norm( + input=input_fc, act=paddle.activation.Linear()) + state_fc = paddle.layer.fc( + input=last_state, + size=size, + act=paddle.activation.Linear(), + bias_attr=False) + return paddle.layer.addto( + name=name + "_state", input=[input_fc_bn, state_fc], act=act) + + forward = paddle.layer.recurrent_group( + step=__simple_rnn_step__, input=input) + return forward + # argument reverse is not exposed in V2 recurrent_group + #backward = paddle.layer.recurrent_group( + + +#step=__simple_rnn_step__, +#input=input, +#reverse=True) +#return paddle.layer.concat(input=[forward, backward]) + + +def conv_group(input): + conv1 = conv_bn_layer( + input=input, + filter_size=(11, 41), + num_channels_in=1, + num_channels_out=32, + stride=(3, 2), + padding=(5, 20), + act=paddle.activation.BRelu()) + conv2 = conv_bn_layer( + input=conv1, + filter_size=(11, 21), + num_channels_in=32, + num_channels_out=32, + stride=(1, 2), + padding=(5, 10), + act=paddle.activation.BRelu()) + conv3 = conv_bn_layer( + input=conv2, + filter_size=(11, 21), + num_channels_in=32, + num_channels_out=32, + stride=(1, 2), + padding=(5, 10), + act=paddle.activation.BRelu()) + return conv3 + + +def rnn_group(input, size, num_stacks): + output = input + for i in xrange(num_stacks): + output = bidirectonal_simple_rnn_bn_layer( + name=str(i), input=output, size=size, act=paddle.activation.BRelu()) + return output + + +def deep_speech2(audio_data, text_data, dict_size): + conv_group_output = conv_group(input=audio_data) + conv2seq = paddle.layer.block_expand( + input=conv_group_output, + num_channels=32, + stride_x=1, + stride_y=1, + block_x=1, + block_y=21) + rnn_group_output = rnn_group(input=conv2seq, size=256, num_stacks=5) + fc = paddle.layer.fc( + input=rnn_group_output, + size=dict_size + 1, + act=paddle.activation.Linear(), + bias_attr=True) + cost = paddle.layer.warp_ctc( + input=fc, + label=text_data, + size=dict_size + 1, + blank=dict_size, + norm_by_times=True) + return cost + + +def train(): + # create network config + dict_size = audio_data_utils.get_vocabulary_size() + audio_data = paddle.layer.data( + name="audio_spectrogram", + height=161, + width=1000, + type=paddle.data_type.dense_vector(161000)) + text_data = paddle.layer.data( + name="transcript_text", + type=paddle.data_type.integer_value_sequence(dict_size)) + cost = deep_speech2(audio_data, text_data, dict_size) + + # create parameters and optimizer + parameters = paddle.parameters.create(cost) + optimizer = paddle.optimizer.Adam( + learning_rate=5e-5, + gradient_clipping_threshold=5, + regularization=paddle.optimizer.L2Regularization(rate=8e-4)) + trainer = paddle.trainer.SGD( + cost=cost, parameters=parameters, update_equation=optimizer) + return + + # create data readers + feeding = { + "audio_spectrogram": 0, + "transcript_text": 1, + } + train_batch_reader = audio_data_utils.padding_batch_reader( + paddle.batch( + audio_data_utils.reader_creator("./libri.manifest.dev"), + batch_size=args.batch_size // args.trainer), + padding=[-1, 1000]) + test_batch_reader = audio_data_utils.padding_batch_reader( + paddle.batch( + audio_data_utils.reader_creator("./libri.manifest.test"), + batch_size=args.batch_size // args.trainer), + padding=[-1, 1000]) + + # create event handler + def event_handler(event): + if isinstance(event, paddle.event.EndIteration): + if event.batch_id % 10 == 0: + print "Pass: %d, Batch: %d, TrainCost: %f, %s" % ( + event.pass_id, event.batch_id, event.cost, event.metrics) + else: + sys.stdout.write('.') + sys.stdout.flush() + if isinstance(event, paddle.event.EndPass): + result = trainer.test(reader=test_batch_reader, feeding=feeding) + print "Pass: %d, TestCost: %f, %s" % (event.pass_id, event.cost, + result.metrics) + with gzip.open("params.tar.gz", 'w') as f: + parameters.to_tar(f) + + # run train + trainer.train( + reader=train_batch_reader, + event_handler=event_handler, + num_passes=10, + feeding=feeding) + + +def main(): + train() + + +if __name__ == '__main__': + main() From 70a343a4991c13120589e7419fa5c3c8551c190d Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Thu, 25 May 2017 16:10:23 +0800 Subject: [PATCH 003/335] Add infererence and add SortaGrad for only first pass. --- README.md | 2 + audio_data_utils.py | 4 ++ infer.py | 94 +++++++++++++++++++++++++++ librispeech.py | 2 +- model.py | 106 ++++++++++++++++++++++++++++++ requirements.sh | 2 +- train.py | 152 ++++++++++++-------------------------------- 7 files changed, 248 insertions(+), 114 deletions(-) create mode 100644 infer.py create mode 100644 model.py diff --git a/README.md b/README.md index fcadf5686..1f7e03847 100644 --- a/README.md +++ b/README.md @@ -5,3 +5,5 @@ sh requirements.sh python librispeech.py python train.py ``` + +Please add warp-ctc library path (usually $PADDLE_INSTALL_DIR/Paddle/third_party/install/warpctc/lib) to LD_LIBRARY_PATH. diff --git a/audio_data_utils.py b/audio_data_utils.py index 2f7bfcf7c..6dedfbf9f 100644 --- a/audio_data_utils.py +++ b/audio_data_utils.py @@ -90,6 +90,10 @@ def get_vocabulary_size(): return len(vocab_dict) +def get_vocabulary(): + return vocabulary_from_file(ENGLISH_CHAR_VOCAB_FILEPATH) + + def parse_transcript(text, vocabulary): """ Convert the transcript text string to list of token index integers.. diff --git a/infer.py b/infer.py new file mode 100644 index 000000000..7b16c8380 --- /dev/null +++ b/infer.py @@ -0,0 +1,94 @@ +import paddle.v2 as paddle +import audio_data_utils +import argparse +from model import deep_speech2 +import gzip +from itertools import groupby + +parser = argparse.ArgumentParser( + description='Simpled version of DeepSpeech2 inference.') +parser.add_argument( + "--num_samples", default=10, type=int, help="Number of inference samples.") +parser.add_argument( + "--num_conv_layers", default=2, type=int, help="Convolution layer number.") +parser.add_argument( + "--num_rnn_layers", default=3, type=int, help="RNN layer number.") +parser.add_argument( + "--rnn_layer_size", default=512, type=int, help="RNN layer cell number.") +parser.add_argument( + "--use_gpu", default=True, type=bool, help="Use gpu or not.") +args = parser.parse_args() + + +def remove_duplicate_and_blank(id_list, blank_id): + # remove consecutive duplicate tokens + id_list = [x[0] for x in groupby(id_list)] + # remove blank + return [id for id in id_list if id != blank_id] + + +def max_infer(): + # create network config + _, vocab_list = audio_data_utils.get_vocabulary() + dict_size = len(vocab_list) + audio_data = paddle.layer.data( + name="audio_spectrogram", + height=161, + width=1000, + type=paddle.data_type.dense_vector(161000)) + text_data = paddle.layer.data( + name="transcript_text", + type=paddle.data_type.integer_value_sequence(dict_size)) + _, max_id = deep_speech2( + audio_data=audio_data, + text_data=text_data, + dict_size=dict_size, + num_conv_layers=args.num_conv_layers, + num_rnn_layers=args.num_rnn_layers, + rnn_size=args.rnn_layer_size) + + # load parameters + parameters = paddle.parameters.Parameters.from_tar( + gzip.open("params.tar.gz")) + + # prepare infer data + feeding = { + "audio_spectrogram": 0, + "transcript_text": 1, + } + test_batch_reader = audio_data_utils.padding_batch_reader( + paddle.batch( + audio_data_utils.reader_creator( + manifest_path="./libri.manifest.test", sort_by_duration=False), + batch_size=args.num_samples), + padding=[-1, 1000]) + infer_data = test_batch_reader().next() + + # run inference + max_id_results = paddle.infer( + output_layer=max_id, + parameters=parameters, + input=infer_data, + field=['id']) + + # postprocess + instance_length = len(max_id_results) / args.num_samples + instance_list = [ + max_id_results[i:i + instance_length] + for i in xrange(0, args.num_samples) + ] + for i, instance in enumerate(instance_list): + id_list = remove_duplicate_and_blank(instance, dict_size) + output_transcript = ''.join([vocab_list[id] for id in id_list]) + target_transcript = ''.join([vocab_list[id] for id in infer_data[i][1]]) + print("Target Transcript: %s \nOutput Transcript: %s \n" % + (target_transcript, output_transcript)) + + +def main(): + paddle.init(use_gpu=args.use_gpu, trainer_count=1) + max_infer() + + +if __name__ == '__main__': + main() diff --git a/librispeech.py b/librispeech.py index fc7b9822b..0d82e19f2 100644 --- a/librispeech.py +++ b/librispeech.py @@ -23,7 +23,7 @@ parser.add_argument( "--manifest", default="./libri.manifest", type=str, - help="Filepath prefix of output manifests.") + help="Filepath prefix for output manifests.") args = parser.parse_args() diff --git a/model.py b/model.py new file mode 100644 index 000000000..67bee5f7f --- /dev/null +++ b/model.py @@ -0,0 +1,106 @@ +import paddle.v2 as paddle + + +def conv_bn_layer(input, filter_size, num_channels_in, num_channels_out, stride, + padding, act): + conv_layer = paddle.layer.img_conv( + input=input, + filter_size=filter_size, + num_channels=num_channels_in, + num_filters=num_channels_out, + stride=stride, + padding=padding, + act=paddle.activation.Linear(), + bias_attr=False) + return paddle.layer.batch_norm(input=conv_layer, act=act) + + +def bidirectonal_simple_rnn_bn_layer(name, input, size, act): + def __simple_rnn_step__(input): + last_state = paddle.layer.memory(name=name + "_state", size=size) + input_fc = paddle.layer.fc( + input=input, + size=size, + act=paddle.activation.Linear(), + bias_attr=False) + input_fc_bn = paddle.layer.batch_norm( + input=input_fc, act=paddle.activation.Linear()) + state_fc = paddle.layer.fc( + input=last_state, + size=size, + act=paddle.activation.Linear(), + bias_attr=False) + return paddle.layer.addto( + name=name + "_state", input=[input_fc_bn, state_fc], act=act) + + forward = paddle.layer.recurrent_group( + step=__simple_rnn_step__, input=input) + return forward + # argument reverse is not exposed in V2 recurrent_group + #backward = paddle.layer.recurrent_group( + + +#step=__simple_rnn_step__, +#input=input, +#reverse=True) +#return paddle.layer.concat(input=[forward, backward]) + + +def conv_group(input, num_stacks): + conv = conv_bn_layer( + input=input, + filter_size=(11, 41), + num_channels_in=1, + num_channels_out=32, + stride=(3, 2), + padding=(5, 20), + act=paddle.activation.BRelu()) + for i in xrange(num_stacks - 1): + conv = conv_bn_layer( + input=conv, + filter_size=(11, 21), + num_channels_in=32, + num_channels_out=32, + stride=(1, 2), + padding=(5, 10), + act=paddle.activation.BRelu()) + return conv + + +def rnn_group(input, size, num_stacks): + output = input + for i in xrange(num_stacks): + output = bidirectonal_simple_rnn_bn_layer( + name=str(i), input=output, size=size, act=paddle.activation.BRelu()) + return output + + +def deep_speech2(audio_data, + text_data, + dict_size, + num_conv_layers=2, + num_rnn_layers=3, + rnn_size=256): + conv_group_output = conv_group(input=audio_data, num_stacks=num_conv_layers) + conv2seq = paddle.layer.block_expand( + input=conv_group_output, + num_channels=32, + stride_x=1, + stride_y=1, + block_x=1, + block_y=21) + rnn_group_output = rnn_group( + input=conv2seq, size=rnn_size, num_stacks=num_rnn_layers) + fc = paddle.layer.fc( + input=rnn_group_output, + size=dict_size + 1, + act=paddle.activation.Linear(), + bias_attr=True) + cost = paddle.layer.warp_ctc( + input=fc, + label=text_data, + size=dict_size + 1, + blank=dict_size, + norm_by_times=True) + max_id = paddle.layer.max_id(input=fc) + return cost, max_id diff --git a/requirements.sh b/requirements.sh index 7a0891699..bb1f261de 100644 --- a/requirements.sh +++ b/requirements.sh @@ -1,5 +1,5 @@ pip install wget pip install soundfile -# For Linux only +# For Ubuntu only apt-get install libsndfile1 diff --git a/train.py b/train.py index 083a718d5..64be40333 100644 --- a/train.py +++ b/train.py @@ -1,6 +1,8 @@ import paddle.v2 as paddle import audio_data_utils import argparse +from model import deep_speech2 +import gzip parser = argparse.ArgumentParser( description='Simpled version of DeepSpeech2 trainer.') @@ -9,114 +11,19 @@ parser.add_argument( parser.add_argument("--trainer", default=1, type=int, help="Trainer number.") parser.add_argument( "--num_passes", default=20, type=int, help="Training pass number.") +parser.add_argument( + "--num_conv_layers", default=2, type=int, help="Convolution layer number.") +parser.add_argument( + "--num_rnn_layers", default=3, type=int, help="RNN layer number.") +parser.add_argument( + "--rnn_layer_size", default=256, type=int, help="RNN layer cell number.") +parser.add_argument( + "--use_gpu", default=True, type=bool, help="Use gpu or not.") +parser.add_argument( + "--trainer_count", default=8, type=int, help="Trainer number.") args = parser.parse_args() -def conv_bn_layer(input, filter_size, num_channels_in, num_channels_out, stride, - padding, act): - conv_layer = paddle.layer.img_conv( - input=input, - filter_size=filter_size, - num_channels=num_channels_in, - num_filters=num_channels_out, - stride=stride, - padding=padding, - act=paddle.activation.Linear(), - bias_attr=False) - return paddle.layer.batch_norm(input=conv_layer, act=act) - - -def bidirectonal_simple_rnn_bn_layer(name, input, size, act): - def __simple_rnn_step__(input): - last_state = paddle.layer.memory(name=name + "_state", size=size) - input_fc = paddle.layer.fc( - input=input, - size=size, - act=paddle.activation.Linear(), - bias_attr=False) - input_fc_bn = paddle.layer.batch_norm( - input=input_fc, act=paddle.activation.Linear()) - state_fc = paddle.layer.fc( - input=last_state, - size=size, - act=paddle.activation.Linear(), - bias_attr=False) - return paddle.layer.addto( - name=name + "_state", input=[input_fc_bn, state_fc], act=act) - - forward = paddle.layer.recurrent_group( - step=__simple_rnn_step__, input=input) - return forward - # argument reverse is not exposed in V2 recurrent_group - #backward = paddle.layer.recurrent_group( - - -#step=__simple_rnn_step__, -#input=input, -#reverse=True) -#return paddle.layer.concat(input=[forward, backward]) - - -def conv_group(input): - conv1 = conv_bn_layer( - input=input, - filter_size=(11, 41), - num_channels_in=1, - num_channels_out=32, - stride=(3, 2), - padding=(5, 20), - act=paddle.activation.BRelu()) - conv2 = conv_bn_layer( - input=conv1, - filter_size=(11, 21), - num_channels_in=32, - num_channels_out=32, - stride=(1, 2), - padding=(5, 10), - act=paddle.activation.BRelu()) - conv3 = conv_bn_layer( - input=conv2, - filter_size=(11, 21), - num_channels_in=32, - num_channels_out=32, - stride=(1, 2), - padding=(5, 10), - act=paddle.activation.BRelu()) - return conv3 - - -def rnn_group(input, size, num_stacks): - output = input - for i in xrange(num_stacks): - output = bidirectonal_simple_rnn_bn_layer( - name=str(i), input=output, size=size, act=paddle.activation.BRelu()) - return output - - -def deep_speech2(audio_data, text_data, dict_size): - conv_group_output = conv_group(input=audio_data) - conv2seq = paddle.layer.block_expand( - input=conv_group_output, - num_channels=32, - stride_x=1, - stride_y=1, - block_x=1, - block_y=21) - rnn_group_output = rnn_group(input=conv2seq, size=256, num_stacks=5) - fc = paddle.layer.fc( - input=rnn_group_output, - size=dict_size + 1, - act=paddle.activation.Linear(), - bias_attr=True) - cost = paddle.layer.warp_ctc( - input=fc, - label=text_data, - size=dict_size + 1, - blank=dict_size, - norm_by_times=True) - return cost - - def train(): # create network config dict_size = audio_data_utils.get_vocabulary_size() @@ -128,7 +35,13 @@ def train(): text_data = paddle.layer.data( name="transcript_text", type=paddle.data_type.integer_value_sequence(dict_size)) - cost = deep_speech2(audio_data, text_data, dict_size) + cost, _ = deep_speech2( + audio_data=audio_data, + text_data=text_data, + dict_size=dict_size, + num_conv_layers=args.num_conv_layers, + num_rnn_layers=args.num_rnn_layers, + rnn_size=args.rnn_layer_size) # create parameters and optimizer parameters = paddle.parameters.create(cost) @@ -138,21 +51,30 @@ def train(): regularization=paddle.optimizer.L2Regularization(rate=8e-4)) trainer = paddle.trainer.SGD( cost=cost, parameters=parameters, update_equation=optimizer) - return # create data readers feeding = { "audio_spectrogram": 0, "transcript_text": 1, } - train_batch_reader = audio_data_utils.padding_batch_reader( + train_batch_reader_with_sortagrad = audio_data_utils.padding_batch_reader( paddle.batch( - audio_data_utils.reader_creator("./libri.manifest.dev"), + audio_data_utils.reader_creator( + manifest_path="./libri.manifest.dev", sort_by_duration=True), + batch_size=args.batch_size // args.trainer), + padding=[-1, 1000]) + train_batch_reader_without_sortagrad = audio_data_utils.padding_batch_reader( + paddle.batch( + audio_data_utils.reader_creator( + manifest_path="./libri.manifest.dev", + sort_by_duration=False, + shuffle=True), batch_size=args.batch_size // args.trainer), padding=[-1, 1000]) test_batch_reader = audio_data_utils.padding_batch_reader( paddle.batch( - audio_data_utils.reader_creator("./libri.manifest.test"), + audio_data_utils.reader_creator( + manifest_path="./libri.manifest.test", sort_by_duration=False), batch_size=args.batch_size // args.trainer), padding=[-1, 1000]) @@ -174,13 +96,19 @@ def train(): # run train trainer.train( - reader=train_batch_reader, + reader=train_batch_reader_with_sortagrad, + event_handler=event_handler, + num_passes=1, + feeding=feeding) + trainer.train( + reader=train_batch_reader_without_sortagrad, event_handler=event_handler, - num_passes=10, + num_passes=self.num_passes - 1, feeding=feeding) def main(): + paddle.init(use_gpu=args.use_gpu, trainer_count=args.trainer_count) train() From 0babc5c4d73a3fab976a46d49e473c556e946f7a Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Thu, 25 May 2017 23:36:06 +0800 Subject: [PATCH 004/335] Add function docs. --- audio_data_utils.py | 56 +++++++++++++++++++++++++++++++++++++++++++-- infer.py | 29 +++++++++++++++++------ librispeech.py | 15 +++++++++--- model.py | 49 +++++++++++++++++++++++++++++++++++++++ train.py | 23 +++++++++++++------ 5 files changed, 153 insertions(+), 19 deletions(-) diff --git a/audio_data_utils.py b/audio_data_utils.py index 6dedfbf9f..a3a397e94 100644 --- a/audio_data_utils.py +++ b/audio_data_utils.py @@ -1,3 +1,6 @@ +""" + Audio data preprocessing tools and reader creators. +""" import paddle.v2 as paddle import logging import json @@ -86,18 +89,24 @@ def vocabulary_from_file(vocabulary_path): def get_vocabulary_size(): + """ + Get vocabulary size. + """ vocab_dict, _ = vocabulary_from_file(ENGLISH_CHAR_VOCAB_FILEPATH) return len(vocab_dict) def get_vocabulary(): + """ + Get vocabulary. + """ return vocabulary_from_file(ENGLISH_CHAR_VOCAB_FILEPATH) def parse_transcript(text, vocabulary): """ - Convert the transcript text string to list of token index integers.. - """ + Convert the transcript text string to list of token index integers. + """ return [vocabulary[w] for w in text] @@ -106,6 +115,28 @@ def reader_creator(manifest_path, shuffle=False, max_duration=10.0, min_duration=0.0): + """ + Audio data reader creator. + + Instance: a tuple of a numpy ndarray of audio spectrogram and a list of + tokenized transcription text. + + :param manifest_path: Filepath for Manifest of audio clip files. + :type manifest_path: basestring + :param sort_by_duration: Sort the audio clips by duration if set True. + For SortaGrad. + :type sort_by_duration: bool + :param shuffle: Shuffle the audio clips if set True. + :type shuffle: bool + :param max_duration: Audio clips with duration (in seconds) greater than + this will be discarded. + :type max_duration: float + :param min_duration: Audio clips with duration (in seconds) smaller than + this will be discarded. + :type min_duration: float + :return: Data reader function. + :rtype: callable + """ if sort_by_duration and shuffle: sort_by_duration = False logger.warn("When shuffle set to true, " @@ -138,6 +169,27 @@ def reader_creator(manifest_path, def padding_batch_reader(batch_reader, padding=[-1, -1], flatten=True): + """ + Padding for batches. Return a batch reader. + + Each instance in a batch will be padded to be of a same target shape. + The target shape is the largest shape among all the batch instances and + 'padding' argument. Therefore, if padding is set [-1, -1], instance will be + padded to have the same shape just within each batch and the shape will + be different across batches; if padding is set + [VERY_LARGE_NUM, VERY_LARGE_NUM], instances in all batches will be padded to + have the same shape of [VERY_LARGE_NUM, VERY_LARGE_NUM]. + + :param batch_reader: Input batch reader. + :type batch_reader: callable + :param padding: Padding pattern. Details please refer to the above. + :type padding: list + :param flatten: Flatten the tensor to be one dimension. + :type flatten: bool + :return: Batch reader function. + :rtype: callable + """ + def padding_batch(batch): new_batch = [] # get target shape within batch diff --git a/infer.py b/infer.py index 7b16c8380..1f13956e8 100644 --- a/infer.py +++ b/infer.py @@ -1,14 +1,21 @@ +""" + Inference for a simplifed version of Baidu DeepSpeech2 model. +""" + import paddle.v2 as paddle -import audio_data_utils +from itertools import groupby import argparse -from model import deep_speech2 import gzip -from itertools import groupby +import audio_data_utils +from model import deep_speech2 parser = argparse.ArgumentParser( - description='Simpled version of DeepSpeech2 inference.') + description='Simplified version of DeepSpeech2 inference.') parser.add_argument( - "--num_samples", default=10, type=int, help="Number of inference samples.") + "--num_samples", + default=10, + type=int, + help="Number of samples for inference.") parser.add_argument( "--num_conv_layers", default=2, type=int, help="Convolution layer number.") parser.add_argument( @@ -21,13 +28,21 @@ args = parser.parse_args() def remove_duplicate_and_blank(id_list, blank_id): + """ + Postprocessing for max-ctc-decoder. + - remove consecutive duplicate tokens. + - remove blanks. + """ # remove consecutive duplicate tokens id_list = [x[0] for x in groupby(id_list)] - # remove blank + # remove blanks return [id for id in id_list if id != blank_id] def max_infer(): + """ + Max-ctc-decoding for DeepSpeech2. + """ # create network config _, vocab_list = audio_data_utils.get_vocabulary() dict_size = len(vocab_list) @@ -64,7 +79,7 @@ def max_infer(): padding=[-1, 1000]) infer_data = test_batch_reader().next() - # run inference + # run max-ctc-decoding max_id_results = paddle.infer( output_layer=max_id, parameters=parameters, diff --git a/librispeech.py b/librispeech.py index 0d82e19f2..8f82a2885 100644 --- a/librispeech.py +++ b/librispeech.py @@ -1,3 +1,11 @@ +""" + Download, unpack and create manifest for Librespeech dataset. + + Manifest is a json file with each line containing one audio clip filepath, + its transcription text string, and its duration. It servers as a unified + interfance to organize different data sets. +""" + import paddle.v2 as paddle import os import wget @@ -88,9 +96,10 @@ def main(): url=URL_DEV, target_dir=os.path.join(args.target_dir), manifest_path=args.manifest + ".dev") - #prepare_dataset(url=URL_TRAIN, -#target_dir=os.path.join(args.target_dir), -#manifest_path=args.manifest + ".train") + prepare_dataset( + url=URL_TRAIN, + target_dir=os.path.join(args.target_dir), + manifest_path=args.manifest + ".train") if __name__ == '__main__': diff --git a/model.py b/model.py index 67bee5f7f..de6357f42 100644 --- a/model.py +++ b/model.py @@ -1,8 +1,17 @@ +""" + A simplifed version of Baidu DeepSpeech2 model. +""" + import paddle.v2 as paddle +#TODO: add bidirectional rnn. + def conv_bn_layer(input, filter_size, num_channels_in, num_channels_out, stride, padding, act): + """ + Convolution layer with batch normalization. + """ conv_layer = paddle.layer.img_conv( input=input, filter_size=filter_size, @@ -16,6 +25,15 @@ def conv_bn_layer(input, filter_size, num_channels_in, num_channels_out, stride, def bidirectonal_simple_rnn_bn_layer(name, input, size, act): + """ + Bidirectonal simple rnn layer with batch normalization. + The batch normalization is only performed on input-state projection + (sequence-wise normalization). + + Question: does mean and variance statistics computed over the whole sequence + or just on each individual time steps? + """ + def __simple_rnn_step__(input): last_state = paddle.layer.memory(name=name + "_state", size=size) input_fc = paddle.layer.fc( @@ -23,6 +41,7 @@ def bidirectonal_simple_rnn_bn_layer(name, input, size, act): size=size, act=paddle.activation.Linear(), bias_attr=False) + # batch norm is only performed on input-state projection input_fc_bn = paddle.layer.batch_norm( input=input_fc, act=paddle.activation.Linear()) state_fc = paddle.layer.fc( @@ -47,6 +66,9 @@ def bidirectonal_simple_rnn_bn_layer(name, input, size, act): def conv_group(input, num_stacks): + """ + Convolution group with several stacking convolution layers. + """ conv = conv_bn_layer( input=input, filter_size=(11, 41), @@ -68,6 +90,9 @@ def conv_group(input, num_stacks): def rnn_group(input, size, num_stacks): + """ + RNN group with several stacking RNN layers. + """ output = input for i in xrange(num_stacks): output = bidirectonal_simple_rnn_bn_layer( @@ -81,7 +106,27 @@ def deep_speech2(audio_data, num_conv_layers=2, num_rnn_layers=3, rnn_size=256): + """ + The whole DeepSpeech2 model structure (a simplified version). + + :param audio_data: Audio spectrogram data layer. + :type audio_data: LayerOutput + :param text_data: Transcription text data layer. + :type text_data: LayerOutput + :param dict_size: Dictionary size for tokenized transcription. + :type dict_size: int + :param num_conv_layers: Number of stacking convolution layers. + :type num_conv_layers: int + :param num_rnn_layers: Number of stacking RNN layers. + :type num_rnn_layers: int + :param rnn_size: RNN layer size (number of RNN cells). + :type rnn_size: int + :return: Tuple of the cost layer and the max_id decoder layer. + :rtype: tuple of LayerOutput + """ + # convolution group conv_group_output = conv_group(input=audio_data, num_stacks=num_conv_layers) + # convert data form convolution feature map to sequence of vectors conv2seq = paddle.layer.block_expand( input=conv_group_output, num_channels=32, @@ -89,18 +134,22 @@ def deep_speech2(audio_data, stride_y=1, block_x=1, block_y=21) + # rnn group rnn_group_output = rnn_group( input=conv2seq, size=rnn_size, num_stacks=num_rnn_layers) + # output token distribution fc = paddle.layer.fc( input=rnn_group_output, size=dict_size + 1, act=paddle.activation.Linear(), bias_attr=True) + # ctc cost cost = paddle.layer.warp_ctc( input=fc, label=text_data, size=dict_size + 1, blank=dict_size, norm_by_times=True) + # max decoder max_id = paddle.layer.max_id(input=fc) return cost, max_id diff --git a/train.py b/train.py index 64be40333..d929297b2 100644 --- a/train.py +++ b/train.py @@ -1,20 +1,27 @@ +""" + Trainer for a simplifed version of Baidu DeepSpeech2 model. +""" + import paddle.v2 as paddle -import audio_data_utils import argparse -from model import deep_speech2 import gzip +import sys +from model import deep_speech2 +import audio_data_utils + +#TODO: add WER metric parser = argparse.ArgumentParser( - description='Simpled version of DeepSpeech2 trainer.') + description='Simplified version of DeepSpeech2 trainer.') parser.add_argument( "--batch_size", default=512, type=int, help="Minibatch size.") parser.add_argument("--trainer", default=1, type=int, help="Trainer number.") parser.add_argument( "--num_passes", default=20, type=int, help="Training pass number.") parser.add_argument( - "--num_conv_layers", default=2, type=int, help="Convolution layer number.") + "--num_conv_layers", default=3, type=int, help="Convolution layer number.") parser.add_argument( - "--num_rnn_layers", default=3, type=int, help="RNN layer number.") + "--num_rnn_layers", default=5, type=int, help="RNN layer number.") parser.add_argument( "--rnn_layer_size", default=256, type=int, help="RNN layer cell number.") parser.add_argument( @@ -25,6 +32,9 @@ args = parser.parse_args() def train(): + """ + DeepSpeech2 training. + """ # create network config dict_size = audio_data_utils.get_vocabulary_size() audio_data = paddle.layer.data( @@ -89,8 +99,7 @@ def train(): sys.stdout.flush() if isinstance(event, paddle.event.EndPass): result = trainer.test(reader=test_batch_reader, feeding=feeding) - print "Pass: %d, TestCost: %f, %s" % (event.pass_id, event.cost, - result.metrics) + print "Pass: %d, TestMetric: %s" % (event.pass_id, result.metrics) with gzip.open("params.tar.gz", 'w') as f: parameters.to_tar(f) From 9c3cd3c704dd079cf00c97d09d7f921c6f20344b Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Fri, 26 May 2017 17:47:24 +0800 Subject: [PATCH 005/335] Update some parameters and comments. --- train.py | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/train.py b/train.py index d929297b2..0d7dd8164 100644 --- a/train.py +++ b/train.py @@ -26,6 +26,8 @@ parser.add_argument( "--rnn_layer_size", default=256, type=int, help="RNN layer cell number.") parser.add_argument( "--use_gpu", default=True, type=bool, help="Use gpu or not.") +parser.add_argument( + "--use_sortagrad", default=False, type=bool, help="Use sortagrad or not.") parser.add_argument( "--trainer_count", default=8, type=int, help="Trainer number.") args = parser.parse_args() @@ -56,12 +58,9 @@ def train(): # create parameters and optimizer parameters = paddle.parameters.create(cost) optimizer = paddle.optimizer.Adam( - learning_rate=5e-5, - gradient_clipping_threshold=5, - regularization=paddle.optimizer.L2Regularization(rate=8e-4)) + learning_rate=5e-4, gradient_clipping_threshold=400) trainer = paddle.trainer.SGD( cost=cost, parameters=parameters, update_equation=optimizer) - # create data readers feeding = { "audio_spectrogram": 0, @@ -70,13 +69,13 @@ def train(): train_batch_reader_with_sortagrad = audio_data_utils.padding_batch_reader( paddle.batch( audio_data_utils.reader_creator( - manifest_path="./libri.manifest.dev", sort_by_duration=True), + manifest_path="./libri.manifest.train", sort_by_duration=True), batch_size=args.batch_size // args.trainer), padding=[-1, 1000]) train_batch_reader_without_sortagrad = audio_data_utils.padding_batch_reader( paddle.batch( audio_data_utils.reader_creator( - manifest_path="./libri.manifest.dev", + manifest_path="./libri.manifest.train", sort_by_duration=False, shuffle=True), batch_size=args.batch_size // args.trainer), @@ -84,7 +83,7 @@ def train(): test_batch_reader = audio_data_utils.padding_batch_reader( paddle.batch( audio_data_utils.reader_creator( - manifest_path="./libri.manifest.test", sort_by_duration=False), + manifest_path="./libri.manifest.dev", sort_by_duration=False), batch_size=args.batch_size // args.trainer), padding=[-1, 1000]) @@ -92,27 +91,31 @@ def train(): def event_handler(event): if isinstance(event, paddle.event.EndIteration): if event.batch_id % 10 == 0: - print "Pass: %d, Batch: %d, TrainCost: %f, %s" % ( - event.pass_id, event.batch_id, event.cost, event.metrics) + print "/nPass: %d, Batch: %d, TrainCost: %f" % ( + event.pass_id, event.batch_id, event.cost) else: sys.stdout.write('.') sys.stdout.flush() if isinstance(event, paddle.event.EndPass): result = trainer.test(reader=test_batch_reader, feeding=feeding) - print "Pass: %d, TestMetric: %s" % (event.pass_id, result.metrics) + print "Pass: %d, TestCost: %s" % (event.pass_id, result.cost) with gzip.open("params.tar.gz", 'w') as f: parameters.to_tar(f) # run train - trainer.train( - reader=train_batch_reader_with_sortagrad, - event_handler=event_handler, - num_passes=1, - feeding=feeding) + # first pass with sortagrad + if args.use_sortagrad: + trainer.train( + reader=train_batch_reader_with_sortagrad, + event_handler=event_handler, + num_passes=1, + feeding=feeding) + args.num_passes -= 1 + # other passes without sortagrad trainer.train( reader=train_batch_reader_without_sortagrad, event_handler=event_handler, - num_passes=self.num_passes - 1, + num_passes=args.num_passes, feeding=feeding) From e6a349992bd894663d3e9bed107a8543d478b735 Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Tue, 30 May 2017 20:34:03 +0800 Subject: [PATCH 006/335] Refactor data utils into a class and add feature normalization. --- audio_data_utils.py | 512 +++++++++++++++++++++++++++++--------------- train.py | 85 ++++---- 2 files changed, 389 insertions(+), 208 deletions(-) diff --git a/audio_data_utils.py b/audio_data_utils.py index a3a397e94..7d09d612a 100644 --- a/audio_data_utils.py +++ b/audio_data_utils.py @@ -1,5 +1,6 @@ """ - Audio data preprocessing tools and reader creators. + Providing basic audio data preprocessing pipeline, and offering + both instance-level and batch-level data reader interfaces. """ import paddle.v2 as paddle import logging @@ -9,143 +10,201 @@ import soundfile import numpy as np import os -# TODO: add z-score normalization. - -ENGLISH_CHAR_VOCAB_FILEPATH = "eng_vocab.txt" - +RANDOM_SEED = 0 logger = logging.getLogger(__name__) -def spectrogram_from_file(filename, - stride_ms=10, - window_ms=20, - max_freq=None, - eps=1e-14): - """ - Calculate the log of linear spectrogram from FFT energy - Refer to utils.py in https://github.com/baidu-research/ba-dls-deepspeech - """ - audio, sample_rate = soundfile.read(filename) - if audio.ndim >= 2: - audio = np.mean(audio, 1) - if max_freq is None: - max_freq = sample_rate / 2 - if max_freq > sample_rate / 2: - raise ValueError("max_freq must be greater than half of " - "sample rate.") - if stride_ms > window_ms: - raise ValueError("Stride size must not be greater than window size.") - stride_size = int(0.001 * sample_rate * stride_ms) - window_size = int(0.001 * sample_rate * window_ms) - spectrogram, freqs = extract_spectrogram( - audio, - window_size=window_size, - stride_size=stride_size, - sample_rate=sample_rate) - ind = np.where(freqs <= max_freq)[0][-1] + 1 - return np.log(spectrogram[:ind, :] + eps) - - -def extract_spectrogram(samples, window_size, stride_size, sample_rate): +class DataGenerator(object): """ - Compute the spectrogram for a real discrete signal. - Refer to utils.py in https://github.com/baidu-research/ba-dls-deepspeech - """ - # extract strided windows - truncate_size = (len(samples) - window_size) % stride_size - samples = samples[:len(samples) - truncate_size] - nshape = (window_size, (len(samples) - window_size) // stride_size + 1) - nstrides = (samples.strides[0], samples.strides[0] * stride_size) - windows = np.lib.stride_tricks.as_strided( - samples, shape=nshape, strides=nstrides) - assert np.all( - windows[:, 1] == samples[stride_size:(stride_size + window_size)]) - # window weighting, compute squared Fast Fourier Transform (fft), scaling - weighting = np.hanning(window_size)[:, None] - fft = np.fft.rfft(windows * weighting, axis=0) - fft = np.absolute(fft)**2 - scale = np.sum(weighting**2) * sample_rate - fft[1:-1, :] *= (2.0 / scale) - fft[(0, -1), :] /= scale - # prepare fft frequency list - freqs = float(sample_rate) / window_size * np.arange(fft.shape[0]) - return fft, freqs - - -def vocabulary_from_file(vocabulary_path): - """ - Load vocabulary from file. + DataGenerator provides basic audio data preprocessing pipeline, and offer + both instance-level and batch-level data reader interfaces. + Normalized FFT are used as audio features here. + + :param vocab_filepath: Vocabulary file path for indexing tokenized + transcriptions. + :type vocab_filepath: basestring + :param normalizer_manifest_path: Manifest filepath for collecting feature + normalization statistics, e.g. mean, std. + :type normalizer_manifest_path: basestring + :param normalizer_num_samples: Number of instances sampled for collecting + feature normalization statistics. + Default is 100. + :type normalizer_num_samples: int + :param max_duration: Audio clips with duration (in seconds) greater than + this will be discarded. Default is 20.0. + :type max_duration: float + :param min_duration: Audio clips with duration (in seconds) smaller than + this will be discarded. Default is 0.0. + :type min_duration: float + :param stride_ms: Striding size (in milliseconds) for generating frames. + Default is 10.0. + :type stride_ms: float + :param window_ms: Window size (in milliseconds) for frames. Default is 20.0. + :type window_ms: float + :param max_frequency: Maximun frequency for FFT features. FFT features of + frequency larger than this will be discarded. + If set None, all features will be kept. + Default is None. + :type max_frequency: float """ - if os.path.exists(vocabulary_path): - vocab_lines = [] - with open(vocabulary_path, 'r') as file: - vocab_lines.extend(file.readlines()) - vocab_list = [line[:-1] for line in vocab_lines] - vocab_dict = dict( - [(token, id) for (id, token) in enumerate(vocab_list)]) - return vocab_dict, vocab_list - else: - raise ValueError("Vocabulary file %s not found.", vocabulary_path) + def __init__(self, + vocab_filepath, + normalizer_manifest_path, + normalizer_num_samples=100, + max_duration=20.0, + min_duration=0.0, + stride_ms=10.0, + window_ms=20.0, + max_frequency=None): + self.__max_duration__ = max_duration + self.__min_duration__ = min_duration + self.__stride_ms__ = stride_ms + self.__window_ms__ = window_ms + self.__max_frequency__ = max_frequency + self.__random__ = random.Random(RANDOM_SEED) + # load vocabulary (dictionary) + self.__vocab_dict__, self.__vocab_list__ = \ + self.__load_vocabulary_from_file__(vocab_filepath) + # collect normalizer statistics + self.__mean__, self.__std__ = self.__collect_normalizer_statistics__( + manifest_path=normalizer_manifest_path, + num_samples=normalizer_num_samples) -def get_vocabulary_size(): - """ - Get vocabulary size. - """ - vocab_dict, _ = vocabulary_from_file(ENGLISH_CHAR_VOCAB_FILEPATH) - return len(vocab_dict) + def __audio_featurize__(self, audio_filename): + """ + Preprocess audio data, including feature extraction, normalization etc.. + """ + features = self.__audio_basic_featurize__(audio_filename) + return self.__normalize__(features) + def __text_featurize__(self, text): + """ + Preprocess text data, including tokenizing and token indexing etc.. + """ + return self.__convert_text_to_char_index__( + text=text, vocabulary=self.__vocab_dict__) -def get_vocabulary(): - """ - Get vocabulary. - """ - return vocabulary_from_file(ENGLISH_CHAR_VOCAB_FILEPATH) + def __audio_basic_featurize__(self, audio_filename): + """ + Compute basic (without normalization etc.) features for audio data. + """ + return self.__spectrogram_from_file__( + filename=audio_filename, + stride_ms=self.__stride_ms__, + window_ms=self.__window_ms__, + max_freq=self.__max_frequency__) + def __collect_normalizer_statistics__(self, manifest_path, num_samples=100): + """ + Compute feature normalization statistics, i.e. mean and stddev. + """ + # read manifest + manifest = self.__read_manifest__( + manifest_path=manifest_path, + max_duration=self.__max_duration__, + min_duration=self.__min_duration__) + # sample for statistics + sampled_manifest = self.__random__.sample(manifest, num_samples) + # extract spectrogram feature + features = [] + for instance in sampled_manifest: + spectrogram = self.__audio_basic_featurize__( + instance["audio_filepath"]) + features.append(spectrogram) + features = np.hstack(features) + mean = np.mean(features, axis=1).reshape([-1, 1]) + std = np.std(features, axis=1).reshape([-1, 1]) + return mean, std -def parse_transcript(text, vocabulary): - """ - Convert the transcript text string to list of token index integers. - """ - return [vocabulary[w] for w in text] + def __normalize__(self, features, eps=1e-14): + """ + Normalize features to be of zero mean and unit stddev. + """ + return (features - self.__mean__) / (self.__std__ + eps) + def __spectrogram_from_file__(self, + filename, + stride_ms=10.0, + window_ms=20.0, + max_freq=None, + eps=1e-14): + """ + Laod audio data and calculate the log of spectrogram by FFT. + Refer to utils.py in https://github.com/baidu-research/ba-dls-deepspeech + """ + audio, sample_rate = soundfile.read(filename) + if audio.ndim >= 2: + audio = np.mean(audio, 1) + if max_freq is None: + max_freq = sample_rate / 2 + if max_freq > sample_rate / 2: + raise ValueError("max_freq must be greater than half of " + "sample rate.") + if stride_ms > window_ms: + raise ValueError("Stride size must not be greater than " + "window size.") + stride_size = int(0.001 * sample_rate * stride_ms) + window_size = int(0.001 * sample_rate * window_ms) + spectrogram, freqs = self.__extract_spectrogram__( + audio, + window_size=window_size, + stride_size=stride_size, + sample_rate=sample_rate) + ind = np.where(freqs <= max_freq)[0][-1] + 1 + return np.log(spectrogram[:ind, :] + eps) -def reader_creator(manifest_path, - sort_by_duration=True, - shuffle=False, - max_duration=10.0, - min_duration=0.0): - """ - Audio data reader creator. - - Instance: a tuple of a numpy ndarray of audio spectrogram and a list of - tokenized transcription text. - - :param manifest_path: Filepath for Manifest of audio clip files. - :type manifest_path: basestring - :param sort_by_duration: Sort the audio clips by duration if set True. - For SortaGrad. - :type sort_by_duration: bool - :param shuffle: Shuffle the audio clips if set True. - :type shuffle: bool - :param max_duration: Audio clips with duration (in seconds) greater than - this will be discarded. - :type max_duration: float - :param min_duration: Audio clips with duration (in seconds) smaller than - this will be discarded. - :type min_duration: float - :return: Data reader function. - :rtype: callable - """ - if sort_by_duration and shuffle: - sort_by_duration = False - logger.warn("When shuffle set to true, " - "sort_by_duration is forced to set False.") - vocab_dict, _ = vocabulary_from_file(ENGLISH_CHAR_VOCAB_FILEPATH) + def __extract_spectrogram__(self, samples, window_size, stride_size, + sample_rate): + """ + Compute the spectrogram by FFT for a discrete real signal. + Refer to utils.py in https://github.com/baidu-research/ba-dls-deepspeech + """ + # extract strided windows + truncate_size = (len(samples) - window_size) % stride_size + samples = samples[:len(samples) - truncate_size] + nshape = (window_size, (len(samples) - window_size) // stride_size + 1) + nstrides = (samples.strides[0], samples.strides[0] * stride_size) + windows = np.lib.stride_tricks.as_strided( + samples, shape=nshape, strides=nstrides) + assert np.all( + windows[:, 1] == samples[stride_size:(stride_size + window_size)]) + # window weighting, squared Fast Fourier Transform (fft), scaling + weighting = np.hanning(window_size)[:, None] + fft = np.fft.rfft(windows * weighting, axis=0) + fft = np.absolute(fft)**2 + scale = np.sum(weighting**2) * sample_rate + fft[1:-1, :] *= (2.0 / scale) + fft[(0, -1), :] /= scale + # prepare fft frequency list + freqs = float(sample_rate) / window_size * np.arange(fft.shape[0]) + return fft, freqs - def reader(): - # read manifest - manifest_data = [] + def __load_vocabulary_from_file__(self, vocabulary_path): + """ + Load vocabulary from file. + """ + if not os.path.exists(vocabulary_path): + raise ValueError("Vocabulary file %s not found.", vocabulary_path) + vocab_lines = [] + with open(vocabulary_path, 'r') as file: + vocab_lines.extend(file.readlines()) + vocab_list = [line[:-1] for line in vocab_lines] + vocab_dict = dict( + [(token, id) for (id, token) in enumerate(vocab_list)]) + return vocab_dict, vocab_list + + def __convert_text_to_char_index__(self, text, vocabulary): + """ + Convert text string to a list of character index integers. + """ + return [vocabulary[w] for w in text] + + def __read_manifest__(self, manifest_path, max_duration, min_duration): + """ + Load and parse manifest file. + """ + manifest = [] for json_line in open(manifest_path): try: json_data = json.loads(json_line) @@ -153,63 +212,172 @@ def reader_creator(manifest_path, raise ValueError("Error reading manifest: %s" % str(e)) if (json_data["duration"] <= max_duration and json_data["duration"] >= min_duration): - manifest_data.append(json_data) - # sort (by duration) or shuffle manifest - if sort_by_duration: - manifest_data.sort(key=lambda x: x["duration"]) - if shuffle: - random.shuffle(manifest_data) - # extract spectrogram feature - for instance in manifest_data: - spectrogram = spectrogram_from_file(instance["audio_filepath"]) - text = parse_transcript(instance["text"], vocab_dict) - yield (spectrogram, text) + manifest.append(json_data) + return manifest - return reader + def __padding_batch__(self, batch, padding_to=-1, flatten=False): + """ + Padding audio part of features (only in the time axis -- column axis) + with zeros, to make each instance in the batch share the same + audio feature shape. + If `padding_to` is set -1, the maximun column numbers in the batch will + be used as the target size. Otherwise, `padding_to` will be the target + size. Default is -1. -def padding_batch_reader(batch_reader, padding=[-1, -1], flatten=True): - """ - Padding for batches. Return a batch reader. - - Each instance in a batch will be padded to be of a same target shape. - The target shape is the largest shape among all the batch instances and - 'padding' argument. Therefore, if padding is set [-1, -1], instance will be - padded to have the same shape just within each batch and the shape will - be different across batches; if padding is set - [VERY_LARGE_NUM, VERY_LARGE_NUM], instances in all batches will be padded to - have the same shape of [VERY_LARGE_NUM, VERY_LARGE_NUM]. - - :param batch_reader: Input batch reader. - :type batch_reader: callable - :param padding: Padding pattern. Details please refer to the above. - :type padding: list - :param flatten: Flatten the tensor to be one dimension. - :type flatten: bool - :return: Batch reader function. - :rtype: callable - """ - - def padding_batch(batch): + If `flatten` is set True, audio data will be flatten to be a 1-dim + ndarray. Default is False. + """ new_batch = [] - # get target shape within batch - nshape_list = [padding] - for audio, text in batch: - nshape_list.append(audio.shape) - target_shape = np.array(nshape_list).max(axis=0) + # get target shape + max_length = max([audio.shape[1] for audio, text in batch]) + if padding_to != -1: + if padding_to < max_length: + raise ValueError("If padding_to is not -1, it should be greater" + " or equal to the original instance length.") + max_length = padding_to # padding for audio, text in batch: - pad_shape = target_shape - audio.shape - assert np.all(pad_shape >= 0) - padded_audio = np.pad( - audio, [(0, pad_shape[0]), (0, pad_shape[1])], mode="constant") + padded_audio = np.zeros([audio.shape[0], max_length]) + padded_audio[:, :audio.shape[1]] = audio if flatten: padded_audio = padded_audio.flatten() new_batch.append((padded_audio, text)) return new_batch - def new_batch_reader(): - for batch in batch_reader(): - yield padding_batch(batch) + def instance_reader_creator(self, + manifest_path, + sort_by_duration=True, + shuffle=False): + """ + Instance reader creator for audio data. Creat a callable function to + produce instances of data. + + Instance: a tuple of a numpy ndarray of audio spectrogram and a list of + tokenized and indexed transcription text. + + :param manifest_path: Filepath of manifest for audio clip files. + :type manifest_path: basestring + :param sort_by_duration: Sort the audio clips by duration if set True + (for SortaGrad). + :type sort_by_duration: bool + :param shuffle: Shuffle the audio clips if set True. + :type shuffle: bool + :return: Data reader function. + :rtype: callable + """ + if sort_by_duration and shuffle: + sort_by_duration = False + logger.warn("When shuffle set to true, " + "sort_by_duration is forced to set False.") + + def reader(): + # read manifest + manifest = self.__read_manifest__( + manifest_path=manifest_path, + max_duration=self.__max_duration__, + min_duration=self.__min_duration__) + # sort (by duration) or shuffle manifest + if sort_by_duration: + manifest.sort(key=lambda x: x["duration"]) + if shuffle: + self.__random__.shuffle(manifest) + # extract spectrogram feature + for instance in manifest: + spectrogram = self.__audio_featurize__( + instance["audio_filepath"]) + transcript = self.__text_featurize__(instance["text"]) + yield (spectrogram, transcript) + + return reader + + def batch_reader_creator(self, + manifest_path, + batch_size, + padding_to=-1, + flatten=False, + sort_by_duration=True, + shuffle=False): + """ + Batch data reader creator for audio data. Creat a callable function to + produce batches of data. + + Audio features will be padded with zeros to make each instance in the + batch to share the same audio feature shape. + + :param manifest_path: Filepath of manifest for audio clip files. + :type manifest_path: basestring + :param batch_size: Instance number in a batch. + :type batch_size: int + :param padding_to: If set -1, the maximun column numbers in the batch + will be used as the target size for padding. + Otherwise, `padding_to` will be the target size. + Default is -1. + :type padding_to: int + :param flatten: If set True, audio data will be flatten to be a 1-dim + ndarray. Otherwise, 2-dim ndarray. Default is False. + :type flatten: bool + :param sort_by_duration: Sort the audio clips by duration if set True + (for SortaGrad). + :type sort_by_duration: bool + :param shuffle: Shuffle the audio clips if set True. + :type shuffle: bool + :return: Batch reader function, producing batches of data when called. + :rtype: callable + """ + + def batch_reader(): + instance_reader = self.instance_reader_creator( + manifest_path=manifest_path, + sort_by_duration=sort_by_duration, + shuffle=shuffle) + batch = [] + for instance in instance_reader(): + batch.append(instance) + if len(batch) == batch_size: + yield self.__padding_batch__(batch, padding_to, flatten) + batch = [] + if len(batch) > 0: + yield self.__padding_batch__(batch, padding_to, flatten) + + return batch_reader + + def vocabulary_size(self): + """ + Get vocabulary size. + + :return: Vocabulary size. + :rtype: int + """ + return len(self.__vocab_list__) + + def vocabulary_dict(self): + """ + Get vocabulary in dict. + + :return: Vocabulary in dict. + :rtype: dict + """ + return self.__vocab_dict__ + + def vocabulary_list(self): + """ + Get vocabulary in list. + + :return: Vocabulary in list + :rtype: list + """ + return self.__vocab_list__ + + def data_name_feeding(self): + """ + Get feeddings (data field name and corresponding field id). - return new_batch_reader + :return: Feeding dict. + :rtype: dict + """ + feeding = { + "audio_spectrogram": 0, + "transcript_text": 1, + } + return feeding diff --git a/train.py b/train.py index 0d7dd8164..89dcf35c9 100644 --- a/train.py +++ b/train.py @@ -5,16 +5,18 @@ import paddle.v2 as paddle import argparse import gzip +import time import sys from model import deep_speech2 -import audio_data_utils +from audio_data_utils import DataGenerator +import numpy as np #TODO: add WER metric parser = argparse.ArgumentParser( description='Simplified version of DeepSpeech2 trainer.') parser.add_argument( - "--batch_size", default=512, type=int, help="Minibatch size.") + "--batch_size", default=32, type=int, help="Minibatch size.") parser.add_argument("--trainer", default=1, type=int, help="Trainer number.") parser.add_argument( "--num_passes", default=20, type=int, help="Training pass number.") @@ -23,7 +25,7 @@ parser.add_argument( parser.add_argument( "--num_rnn_layers", default=5, type=int, help="RNN layer number.") parser.add_argument( - "--rnn_layer_size", default=256, type=int, help="RNN layer cell number.") + "--rnn_layer_size", default=512, type=int, help="RNN layer cell number.") parser.add_argument( "--use_gpu", default=True, type=bool, help="Use gpu or not.") parser.add_argument( @@ -37,13 +39,45 @@ def train(): """ DeepSpeech2 training. """ + # create data readers + data_generator = DataGenerator( + vocab_filepath='eng_vocab.txt', + normalizer_manifest_path='./libri.manifest.train', + normalizer_num_samples=200, + max_duration=20.0, + min_duration=0.0, + stride_ms=10, + window_ms=20) + train_batch_reader_sortagrad = data_generator.batch_reader_creator( + manifest_path='./libri.manifest.dev.small', + batch_size=args.batch_size // args.trainer, + padding_to=2000, + flatten=True, + sort_by_duration=True, + shuffle=False) + train_batch_reader_nosortagrad = data_generator.batch_reader_creator( + manifest_path='./libri.manifest.dev.small', + batch_size=args.batch_size // args.trainer, + padding_to=2000, + flatten=True, + sort_by_duration=False, + shuffle=True) + test_batch_reader = data_generator.batch_reader_creator( + manifest_path='./libri.manifest.test', + batch_size=args.batch_size // args.trainer, + padding_to=2000, + flatten=True, + sort_by_duration=False, + shuffle=False) + feeding = data_generator.data_name_feeding() + # create network config - dict_size = audio_data_utils.get_vocabulary_size() + dict_size = data_generator.vocabulary_size() audio_data = paddle.layer.data( name="audio_spectrogram", height=161, - width=1000, - type=paddle.data_type.dense_vector(161000)) + width=2000, + type=paddle.data_type.dense_vector(322000)) text_data = paddle.layer.data( name="transcript_text", type=paddle.data_type.integer_value_sequence(dict_size)) @@ -58,47 +92,26 @@ def train(): # create parameters and optimizer parameters = paddle.parameters.create(cost) optimizer = paddle.optimizer.Adam( - learning_rate=5e-4, gradient_clipping_threshold=400) + learning_rate=5e-5, gradient_clipping_threshold=400) trainer = paddle.trainer.SGD( cost=cost, parameters=parameters, update_equation=optimizer) - # create data readers - feeding = { - "audio_spectrogram": 0, - "transcript_text": 1, - } - train_batch_reader_with_sortagrad = audio_data_utils.padding_batch_reader( - paddle.batch( - audio_data_utils.reader_creator( - manifest_path="./libri.manifest.train", sort_by_duration=True), - batch_size=args.batch_size // args.trainer), - padding=[-1, 1000]) - train_batch_reader_without_sortagrad = audio_data_utils.padding_batch_reader( - paddle.batch( - audio_data_utils.reader_creator( - manifest_path="./libri.manifest.train", - sort_by_duration=False, - shuffle=True), - batch_size=args.batch_size // args.trainer), - padding=[-1, 1000]) - test_batch_reader = audio_data_utils.padding_batch_reader( - paddle.batch( - audio_data_utils.reader_creator( - manifest_path="./libri.manifest.dev", sort_by_duration=False), - batch_size=args.batch_size // args.trainer), - padding=[-1, 1000]) # create event handler def event_handler(event): + global start_time if isinstance(event, paddle.event.EndIteration): if event.batch_id % 10 == 0: - print "/nPass: %d, Batch: %d, TrainCost: %f" % ( + print "\nPass: %d, Batch: %d, TrainCost: %f" % ( event.pass_id, event.batch_id, event.cost) else: sys.stdout.write('.') sys.stdout.flush() + if isinstance(event, paddle.event.BeginPass): + start_time = time.time() if isinstance(event, paddle.event.EndPass): result = trainer.test(reader=test_batch_reader, feeding=feeding) - print "Pass: %d, TestCost: %s" % (event.pass_id, result.cost) + print "\n------- Time: %d, Pass: %d, TestCost: %s" % ( + time.time() - start_time, event.pass_id, result.cost) with gzip.open("params.tar.gz", 'w') as f: parameters.to_tar(f) @@ -106,14 +119,14 @@ def train(): # first pass with sortagrad if args.use_sortagrad: trainer.train( - reader=train_batch_reader_with_sortagrad, + reader=train_batch_reader_sortagrad, event_handler=event_handler, num_passes=1, feeding=feeding) args.num_passes -= 1 # other passes without sortagrad trainer.train( - reader=train_batch_reader_without_sortagrad, + reader=train_batch_reader_nosortagrad, event_handler=event_handler, num_passes=args.num_passes, feeding=feeding) From 8313895e858ad7da2f45e373446dd0c11e923431 Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Fri, 2 Jun 2017 21:07:58 +0800 Subject: [PATCH 007/335] 1. Fix incorrect decoder result printing. 2. Fix incorrect batch-norm usage in RNN. 3. Fix overlapping train/dev/test manfests. 4. Update README.md and requirements.txt. 5. Expose more arguments to users in argparser. 6. Update all other details. --- README.md | 55 +++++++++++++++++-- audio_data_utils.py | 2 +- infer.py | 83 ++++++++++++++++++++--------- librispeech.py | 80 +++++++++++++++++++--------- model.py | 65 ++++++++--------------- requirements.sh | 5 -- requirements.txt | 2 + train.py | 125 ++++++++++++++++++++++++++++++-------------- 8 files changed, 280 insertions(+), 137 deletions(-) delete mode 100644 requirements.sh create mode 100644 requirements.txt diff --git a/README.md b/README.md index 1f7e03847..48ee9f9a1 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,58 @@ # Deep Speech 2 on PaddlePaddle +## Quick Start + +### Installation + +Please replace `$PADDLE_INSTALL_DIR` with your paddle installation directory. + +``` +pip install -r requirements.txt +export LD_LIBRARY_PATH=$PADDLE_INSTALL_DIR/Paddle/third_party/install/warpctc/lib:$LD_LIBRARY_PATH +``` + +For some machines, we also need to install libsndfile1. Details to be added. + +### Preparing Dataset(s) + ``` -sh requirements.sh python librispeech.py -python train.py ``` -Please add warp-ctc library path (usually $PADDLE_INSTALL_DIR/Paddle/third_party/install/warpctc/lib) to LD_LIBRARY_PATH. +More help for arguments: + +``` +python librispeech.py --help +``` + +### Traininig + +For GPU Training: + +``` +CUDA_VISIBLE_DEVICES=0,1,2,3 python train.py --trainer_count 4 +``` + +For CPU Training: + +``` +python train.py --trainer_count 8 --use_gpu False +``` + +More help for arguments: + +``` +python train.py --help +``` + +### Inferencing + +``` +python infer.py +``` + +More help for arguments: + +``` +python infer.py --help +``` diff --git a/audio_data_utils.py b/audio_data_utils.py index 7d09d612a..c717bcf18 100644 --- a/audio_data_utils.py +++ b/audio_data_utils.py @@ -16,7 +16,7 @@ logger = logging.getLogger(__name__) class DataGenerator(object): """ - DataGenerator provides basic audio data preprocessing pipeline, and offer + DataGenerator provides basic audio data preprocessing pipeline, and offers both instance-level and batch-level data reader interfaces. Normalized FFT are used as audio features here. diff --git a/infer.py b/infer.py index 1f13956e8..1c52c98fd 100644 --- a/infer.py +++ b/infer.py @@ -4,9 +4,10 @@ import paddle.v2 as paddle from itertools import groupby +import distutils.util import argparse import gzip -import audio_data_utils +from audio_data_utils import DataGenerator from model import deep_speech2 parser = argparse.ArgumentParser( @@ -15,15 +16,42 @@ parser.add_argument( "--num_samples", default=10, type=int, - help="Number of samples for inference.") + help="Number of samples for inference. (default: %(default)s)") parser.add_argument( - "--num_conv_layers", default=2, type=int, help="Convolution layer number.") + "--num_conv_layers", + default=2, + type=int, + help="Convolution layer number. (default: %(default)s)") +parser.add_argument( + "--num_rnn_layers", + default=3, + type=int, + help="RNN layer number. (default: %(default)s)") +parser.add_argument( + "--rnn_layer_size", + default=512, + type=int, + help="RNN layer cell number. (default: %(default)s)") +parser.add_argument( + "--use_gpu", + default=True, + type=distutils.util.strtobool, + help="Use gpu or not. (default: %(default)s)") parser.add_argument( - "--num_rnn_layers", default=3, type=int, help="RNN layer number.") + "--normalizer_manifest_path", + default='./manifest.libri.train-clean-100', + type=str, + help="Manifest path for normalizer. (default: %(default)s)") parser.add_argument( - "--rnn_layer_size", default=512, type=int, help="RNN layer cell number.") + "--decode_manifest_path", + default='./manifest.libri.test-clean', + type=str, + help="Manifest path for decoding. (default: %(default)s)") parser.add_argument( - "--use_gpu", default=True, type=bool, help="Use gpu or not.") + "--model_filepath", + default='./params.tar.gz', + type=str, + help="Model filepath. (default: %(default)s)") args = parser.parse_args() @@ -39,18 +67,27 @@ def remove_duplicate_and_blank(id_list, blank_id): return [id for id in id_list if id != blank_id] -def max_infer(): +def best_path_decode(): """ Max-ctc-decoding for DeepSpeech2. """ + # initialize data generator + data_generator = DataGenerator( + vocab_filepath='eng_vocab.txt', + normalizer_manifest_path=args.normalizer_manifest_path, + normalizer_num_samples=200, + max_duration=20.0, + min_duration=0.0, + stride_ms=10, + window_ms=20) # create network config - _, vocab_list = audio_data_utils.get_vocabulary() - dict_size = len(vocab_list) + dict_size = data_generator.vocabulary_size() + vocab_list = data_generator.vocabulary_list() audio_data = paddle.layer.data( name="audio_spectrogram", height=161, - width=1000, - type=paddle.data_type.dense_vector(161000)) + width=2000, + type=paddle.data_type.dense_vector(322000)) text_data = paddle.layer.data( name="transcript_text", type=paddle.data_type.integer_value_sequence(dict_size)) @@ -64,19 +101,17 @@ def max_infer(): # load parameters parameters = paddle.parameters.Parameters.from_tar( - gzip.open("params.tar.gz")) + gzip.open(args.model_filepath)) # prepare infer data - feeding = { - "audio_spectrogram": 0, - "transcript_text": 1, - } - test_batch_reader = audio_data_utils.padding_batch_reader( - paddle.batch( - audio_data_utils.reader_creator( - manifest_path="./libri.manifest.test", sort_by_duration=False), - batch_size=args.num_samples), - padding=[-1, 1000]) + feeding = data_generator.data_name_feeding() + test_batch_reader = data_generator.batch_reader_creator( + manifest_path=args.decode_manifest_path, + batch_size=args.num_samples, + padding_to=2000, + flatten=True, + sort_by_duration=False, + shuffle=False) infer_data = test_batch_reader().next() # run max-ctc-decoding @@ -89,7 +124,7 @@ def max_infer(): # postprocess instance_length = len(max_id_results) / args.num_samples instance_list = [ - max_id_results[i:i + instance_length] + max_id_results[i * instance_length:(i + 1) * instance_length] for i in xrange(0, args.num_samples) ] for i, instance in enumerate(instance_list): @@ -102,7 +137,7 @@ def max_infer(): def main(): paddle.init(use_gpu=args.use_gpu, trainer_count=1) - max_infer() + best_path_decode() if __name__ == '__main__': diff --git a/librispeech.py b/librispeech.py index 8f82a2885..676bbec5c 100644 --- a/librispeech.py +++ b/librispeech.py @@ -7,6 +7,7 @@ """ import paddle.v2 as paddle +from paddle.v2.dataset.common import md5file import os import wget import tarfile @@ -14,11 +15,22 @@ import argparse import soundfile import json -DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech') +DATA_HOME = os.path.expanduser('~/.cache2/paddle/dataset/speech') -URL_TEST = "http://www.openslr.org/resources/12/test-clean.tar.gz" -URL_DEV = "http://www.openslr.org/resources/12/dev-clean.tar.gz" -URL_TRAIN = "http://www.openslr.org/resources/12/train-clean-100.tar.gz" +URL_ROOT = "http://www.openslr.org/resources/12" +URL_TEST_CLEAN = URL_ROOT + "/test-clean.tar.gz" +URL_TEST_OTHER = URL_ROOT + "/test-other.tar.gz" +URL_DEV_CLEAN = URL_ROOT + "/dev-clean.tar.gz" +URL_DEV_OTHER = URL_ROOT + "/dev-other.tar.gz" +URL_TRAIN_CLEAN_100 = URL_ROOT + "/train-clean-100.tar.gz" +URL_TRAIN_CLEAN_360 = URL_ROOT + "/train-clean-360.tar.gz" +URL_TRAIN_OTHER_500 = URL_ROOT + "/train-other-500.tar.gz" + +MD5_TEST_CLEAN = "32fa31d27d2e1cad72775fee3f4849a9" +MD5_DEV_CLEAN = "42e2234ba48799c1f50f24a7926300a1" +MD5_TRAIN_CLEAN_100 = "2a93770f6d5c6c964bc36631d331a522" +MD5_TRAIN_CLEAN_360 = "c0e676e450a7ff2f54aeade5171606fa" +MD5_TRAIN_CLEAN_500 = "d1a0fd59409feb2c614ce4d30c387708" parser = argparse.ArgumentParser( description='Downloads and prepare LibriSpeech dataset.') @@ -26,27 +38,33 @@ parser.add_argument( "--target_dir", default=DATA_HOME + "/Libri", type=str, - help="Directory to save the dataset.") + help="Directory to save the dataset. (default: %(default)s)") parser.add_argument( - "--manifest", - default="./libri.manifest", + "--manifest_prefix", + default="manifest.libri", type=str, - help="Filepath prefix for output manifests.") + help="Filepath prefix for output manifests. (default: %(default)s)") args = parser.parse_args() -def download(url, target_dir): - if not os.path.exists(target_dir): - os.makedirs(target_dir) +def download(url, md5sum, target_dir): + """ + Download file from url to target_dir, and check md5sum. + """ + if not os.path.exists(target_dir): os.makedirs(target_dir) filepath = os.path.join(target_dir, url.split("/")[-1]) - if not os.path.exists(filepath): + if not (os.path.exists(filepath) and md5file(filepath) == md5sum): print("Downloading %s ..." % url) wget.download(url, target_dir) - print("") + print("\nMD5 Chesksum %s ..." % filepath) + assert md5file(filepath) == md5sum, "MD5 checksum failed." return filepath def unpack(filepath, target_dir): + """ + Unpack the file to the target_dir. + """ print("Unpacking %s ..." % filepath) tar = tarfile.open(filepath) tar.extractall(target_dir) @@ -55,6 +73,14 @@ def unpack(filepath, target_dir): def create_manifest(data_dir, manifest_path): + """ + Create a manifest file summarizing the dataset (list of filepath and meta + data). + + Each line of the manifest contains one audio clip filepath, its + transcription text string, and its duration. Manifest file servers as a + unified interfance to organize data sets. + """ print("Creating manifest %s ..." % manifest_path) json_lines = [] for subfolder, _, filelist in os.walk(data_dir): @@ -81,25 +107,31 @@ def create_manifest(data_dir, manifest_path): out_file.write(line + '\n') -def prepare_dataset(url, target_dir, manifest_path): - filepath = download(url, target_dir) +def prepare_dataset(url, md5sum, target_dir, manifest_path): + """ + Download, unpack and create summmary manifest file. + """ + filepath = download(url, md5sum, target_dir) unpacked_dir = unpack(filepath, target_dir) create_manifest(unpacked_dir, manifest_path) def main(): prepare_dataset( - url=URL_TEST, - target_dir=os.path.join(args.target_dir), - manifest_path=args.manifest + ".test") + url=URL_TEST_CLEAN, + md5sum=MD5_TEST_CLEAN, + target_dir=os.path.join(args.target_dir, "test-clean"), + manifest_path=args.manifest_prefix + ".test-clean") prepare_dataset( - url=URL_DEV, - target_dir=os.path.join(args.target_dir), - manifest_path=args.manifest + ".dev") + url=URL_DEV_CLEAN, + md5sum=MD5_DEV_CLEAN, + target_dir=os.path.join(args.target_dir, "dev-clean"), + manifest_path=args.manifest_prefix + ".dev-clean") prepare_dataset( - url=URL_TRAIN, - target_dir=os.path.join(args.target_dir), - manifest_path=args.manifest + ".train") + url=URL_TRAIN_CLEAN_100, + md5sum=MD5_TRAIN_CLEAN_100, + target_dir=os.path.join(args.target_dir, "train-clean-100"), + manifest_path=args.manifest_prefix + ".train-clean-100") if __name__ == '__main__': diff --git a/model.py b/model.py index de6357f42..6b396900e 100644 --- a/model.py +++ b/model.py @@ -24,45 +24,23 @@ def conv_bn_layer(input, filter_size, num_channels_in, num_channels_out, stride, return paddle.layer.batch_norm(input=conv_layer, act=act) -def bidirectonal_simple_rnn_bn_layer(name, input, size, act): +def bidirectional_simple_rnn_bn_layer(name, input, size, act): """ - Bidirectonal simple rnn layer with batch normalization. - The batch normalization is only performed on input-state projection - (sequence-wise normalization). - - Question: does mean and variance statistics computed over the whole sequence - or just on each individual time steps? + Bidirectonal simple rnn layer with sequence-wise batch normalization. + The batch normalization is only performed on input-state weights. """ - - def __simple_rnn_step__(input): - last_state = paddle.layer.memory(name=name + "_state", size=size) - input_fc = paddle.layer.fc( - input=input, - size=size, - act=paddle.activation.Linear(), - bias_attr=False) - # batch norm is only performed on input-state projection - input_fc_bn = paddle.layer.batch_norm( - input=input_fc, act=paddle.activation.Linear()) - state_fc = paddle.layer.fc( - input=last_state, - size=size, - act=paddle.activation.Linear(), - bias_attr=False) - return paddle.layer.addto( - name=name + "_state", input=[input_fc_bn, state_fc], act=act) - - forward = paddle.layer.recurrent_group( - step=__simple_rnn_step__, input=input) - return forward - # argument reverse is not exposed in V2 recurrent_group - #backward = paddle.layer.recurrent_group( - - -#step=__simple_rnn_step__, -#input=input, -#reverse=True) -#return paddle.layer.concat(input=[forward, backward]) + # input-hidden weights shared across bi-direcitonal rnn. + input_proj = paddle.layer.fc( + input=input, size=size, act=paddle.activation.Linear(), bias_attr=False) + # batch norm is only performed on input-state projection + input_proj_bn = paddle.layer.batch_norm( + input=input_proj, act=paddle.activation.Linear()) + # forward and backward in time + forward_simple_rnn = paddle.layer.recurrent( + input=input_proj_bn, act=act, reverse=False) + backward_simple_rnn = paddle.layer.recurrent( + input=input_proj_bn, act=act, reverse=True) + return paddle.layer.concat(input=[forward_simple_rnn, backward_simple_rnn]) def conv_group(input, num_stacks): @@ -86,7 +64,9 @@ def conv_group(input, num_stacks): stride=(1, 2), padding=(5, 10), act=paddle.activation.BRelu()) - return conv + output_num_channels = 32 + output_height = 160 // pow(2, num_stacks) + 1 + return conv, output_num_channels, output_height def rnn_group(input, size, num_stacks): @@ -95,7 +75,7 @@ def rnn_group(input, size, num_stacks): """ output = input for i in xrange(num_stacks): - output = bidirectonal_simple_rnn_bn_layer( + output = bidirectional_simple_rnn_bn_layer( name=str(i), input=output, size=size, act=paddle.activation.BRelu()) return output @@ -125,15 +105,16 @@ def deep_speech2(audio_data, :rtype: tuple of LayerOutput """ # convolution group - conv_group_output = conv_group(input=audio_data, num_stacks=num_conv_layers) + conv_group_output, conv_group_num_channels, conv_group_height = conv_group( + input=audio_data, num_stacks=num_conv_layers) # convert data form convolution feature map to sequence of vectors conv2seq = paddle.layer.block_expand( input=conv_group_output, - num_channels=32, + num_channels=conv_group_num_channels, stride_x=1, stride_y=1, block_x=1, - block_y=21) + block_y=conv_group_height) # rnn group rnn_group_output = rnn_group( input=conv2seq, size=rnn_size, num_stacks=num_rnn_layers) diff --git a/requirements.sh b/requirements.sh deleted file mode 100644 index bb1f261de..000000000 --- a/requirements.sh +++ /dev/null @@ -1,5 +0,0 @@ -pip install wget -pip install soundfile - -# For Ubuntu only -apt-get install libsndfile1 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 000000000..58a93debe --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +SoundFile==0.9.0.post1 +wget==3.2 diff --git a/train.py b/train.py index 89dcf35c9..ad6e5ffd1 100644 --- a/train.py +++ b/train.py @@ -3,6 +3,7 @@ """ import paddle.v2 as paddle +import distutils.util import argparse import gzip import time @@ -17,21 +18,61 @@ parser = argparse.ArgumentParser( description='Simplified version of DeepSpeech2 trainer.') parser.add_argument( "--batch_size", default=32, type=int, help="Minibatch size.") -parser.add_argument("--trainer", default=1, type=int, help="Trainer number.") parser.add_argument( - "--num_passes", default=20, type=int, help="Training pass number.") + "--num_passes", + default=20, + type=int, + help="Training pass number. (default: %(default)s)") parser.add_argument( - "--num_conv_layers", default=3, type=int, help="Convolution layer number.") + "--num_conv_layers", + default=2, + type=int, + help="Convolution layer number. (default: %(default)s)") parser.add_argument( - "--num_rnn_layers", default=5, type=int, help="RNN layer number.") + "--num_rnn_layers", + default=3, + type=int, + help="RNN layer number. (default: %(default)s)") parser.add_argument( - "--rnn_layer_size", default=512, type=int, help="RNN layer cell number.") + "--rnn_layer_size", + default=512, + type=int, + help="RNN layer cell number. (default: %(default)s)") parser.add_argument( - "--use_gpu", default=True, type=bool, help="Use gpu or not.") + "--adam_learning_rate", + default=5e-4, + type=float, + help="Learning rate for ADAM Optimizer. (default: %(default)s)") parser.add_argument( - "--use_sortagrad", default=False, type=bool, help="Use sortagrad or not.") + "--use_gpu", + default=True, + type=distutils.util.strtobool, + help="Use gpu or not. (default: %(default)s)") parser.add_argument( - "--trainer_count", default=8, type=int, help="Trainer number.") + "--use_sortagrad", + default=False, + type=distutils.util.strtobool, + help="Use sortagrad or not. (default: %(default)s)") +parser.add_argument( + "--trainer_count", + default=4, + type=int, + help="Trainer number. (default: %(default)s)") +parser.add_argument( + "--normalizer_manifest_path", + default='./manifest.libri.train-clean-100', + type=str, + help="Manifest path for normalizer. (default: %(default)s)") +parser.add_argument( + "--train_manifest_path", + default='./manifest.libri.train-clean-100', + type=str, + help="Manifest path for training. (default: %(default)s)") +parser.add_argument( + "--dev_manifest_path", + default='./manifest.libri.dev-clean', + type=str, + help="Manifest path for validation. (default: %(default)s)") args = parser.parse_args() @@ -39,37 +80,15 @@ def train(): """ DeepSpeech2 training. """ - # create data readers + # initialize data generator data_generator = DataGenerator( vocab_filepath='eng_vocab.txt', - normalizer_manifest_path='./libri.manifest.train', + normalizer_manifest_path=args.normalizer_manifest_path, normalizer_num_samples=200, max_duration=20.0, min_duration=0.0, stride_ms=10, window_ms=20) - train_batch_reader_sortagrad = data_generator.batch_reader_creator( - manifest_path='./libri.manifest.dev.small', - batch_size=args.batch_size // args.trainer, - padding_to=2000, - flatten=True, - sort_by_duration=True, - shuffle=False) - train_batch_reader_nosortagrad = data_generator.batch_reader_creator( - manifest_path='./libri.manifest.dev.small', - batch_size=args.batch_size // args.trainer, - padding_to=2000, - flatten=True, - sort_by_duration=False, - shuffle=True) - test_batch_reader = data_generator.batch_reader_creator( - manifest_path='./libri.manifest.test', - batch_size=args.batch_size // args.trainer, - padding_to=2000, - flatten=True, - sort_by_duration=False, - shuffle=False) - feeding = data_generator.data_name_feeding() # create network config dict_size = data_generator.vocabulary_size() @@ -92,28 +111,58 @@ def train(): # create parameters and optimizer parameters = paddle.parameters.create(cost) optimizer = paddle.optimizer.Adam( - learning_rate=5e-5, gradient_clipping_threshold=400) + learning_rate=args.adam_learning_rate, gradient_clipping_threshold=400) trainer = paddle.trainer.SGD( cost=cost, parameters=parameters, update_equation=optimizer) + # prepare data reader + train_batch_reader_sortagrad = data_generator.batch_reader_creator( + manifest_path=args.train_manifest_path, + batch_size=args.batch_size // args.trainer_count, + padding_to=2000, + flatten=True, + sort_by_duration=True, + shuffle=False) + train_batch_reader_nosortagrad = data_generator.batch_reader_creator( + manifest_path=args.train_manifest_path, + batch_size=args.batch_size // args.trainer_count, + padding_to=2000, + flatten=True, + sort_by_duration=False, + shuffle=True) + test_batch_reader = data_generator.batch_reader_creator( + manifest_path=args.dev_manifest_path, + batch_size=args.batch_size // args.trainer_count, + padding_to=2000, + flatten=True, + sort_by_duration=False, + shuffle=False) + feeding = data_generator.data_name_feeding() + # create event handler def event_handler(event): global start_time + global cost_sum + global cost_counter if isinstance(event, paddle.event.EndIteration): - if event.batch_id % 10 == 0: + cost_sum += event.cost + cost_counter += 1 + if event.batch_id % 50 == 0: print "\nPass: %d, Batch: %d, TrainCost: %f" % ( - event.pass_id, event.batch_id, event.cost) + event.pass_id, event.batch_id, cost_sum / cost_counter) + cost_sum, cost_counter = 0.0, 0 + with gzip.open("params.tar.gz", 'w') as f: + parameters.to_tar(f) else: sys.stdout.write('.') sys.stdout.flush() if isinstance(event, paddle.event.BeginPass): start_time = time.time() + cost_sum, cost_counter = 0.0, 0 if isinstance(event, paddle.event.EndPass): result = trainer.test(reader=test_batch_reader, feeding=feeding) - print "\n------- Time: %d, Pass: %d, TestCost: %s" % ( + print "\n------- Time: %d sec, Pass: %d, ValidationCost: %s" % ( time.time() - start_time, event.pass_id, result.cost) - with gzip.open("params.tar.gz", 'w') as f: - parameters.to_tar(f) # run train # first pass with sortagrad From 2a834865009ff52524a70a97f13d7d2ec78a61c9 Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Sat, 3 Jun 2017 14:52:02 +0800 Subject: [PATCH 008/335] Refactor decoder interfaces and add ./data directory. --- README.md | 2 + eng_vocab.txt => data/eng_vocab.txt | 0 librispeech.py => data/librispeech.py | 0 decoder.py | 60 +++++++++++++++++++++++ infer.py | 69 ++++++++++++--------------- model.py | 34 ++++++++----- train.py | 28 ++++++----- 7 files changed, 130 insertions(+), 63 deletions(-) rename eng_vocab.txt => data/eng_vocab.txt (100%) rename librispeech.py => data/librispeech.py (100%) create mode 100755 decoder.py diff --git a/README.md b/README.md index 48ee9f9a1..b20c75f97 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,9 @@ For some machines, we also need to install libsndfile1. Details to be added. ### Preparing Dataset(s) ``` +cd data python librispeech.py +cd .. ``` More help for arguments: diff --git a/eng_vocab.txt b/data/eng_vocab.txt similarity index 100% rename from eng_vocab.txt rename to data/eng_vocab.txt diff --git a/librispeech.py b/data/librispeech.py similarity index 100% rename from librispeech.py rename to data/librispeech.py diff --git a/decoder.py b/decoder.py new file mode 100755 index 000000000..7c4b95263 --- /dev/null +++ b/decoder.py @@ -0,0 +1,60 @@ +""" + CTC-like decoder utilitis. +""" + +from itertools import groupby +import numpy as np + + +def ctc_best_path_decode(probs_seq, vocabulary): + """ + Best path decoding, also called argmax decoding or greedy decoding. + Path consisting of the most probable tokens are further post-processed to + remove consecutive repetitions and all blanks. + + :param probs_seq: 2-D list of probabilities over the vocabulary for each + character. Each element is a list of float probabilities + for one character. + :type probs_seq: list + :param vocabulary: Vocabulary list. + :type vocabulary: list + :return: Decoding result string. + :rtype: baseline + """ + # dimension verification + for probs in probs_seq: + if not len(probs) == len(vocabulary) + 1: + raise ValueError("probs_seq dimension mismatchedd with vocabulary") + # argmax to get the best index for each time step + max_index_list = list(np.array(probs_seq).argmax(axis=1)) + # remove consecutive duplicate indexes + index_list = [index_group[0] for index_group in groupby(max_index_list)] + # remove blank indexes + blank_index = len(vocabulary) + index_list = [index for index in index_list if index != blank_index] + # convert index list to string + return ''.join([vocabulary[index] for index in index_list]) + + +def ctc_decode(probs_seq, vocabulary, method): + """ + CTC-like sequence decoding from a sequence of likelihood probablilites. + + :param probs_seq: 2-D list of probabilities over the vocabulary for each + character. Each element is a list of float probabilities + for one character. + :type probs_seq: list + :param vocabulary: Vocabulary list. + :type vocabulary: list + :param method: Decoding method name, with options: "best_path". + :type method: basestring + :return: Decoding result string. + :rtype: baseline + """ + for prob_list in probs_seq: + if not len(prob_list) == len(vocabulary) + 1: + raise ValueError("probs dimension mismatchedd with vocabulary") + if method == "best_path": + return ctc_best_path_decode(probs_seq, vocabulary) + else: + raise ValueError("Decoding method [%s] is not supported.") diff --git a/infer.py b/infer.py index 1c52c98fd..598c348b0 100644 --- a/infer.py +++ b/infer.py @@ -3,12 +3,12 @@ """ import paddle.v2 as paddle -from itertools import groupby import distutils.util import argparse import gzip from audio_data_utils import DataGenerator from model import deep_speech2 +from decoder import ctc_decode parser = argparse.ArgumentParser( description='Simplified version of DeepSpeech2 inference.') @@ -39,12 +39,12 @@ parser.add_argument( help="Use gpu or not. (default: %(default)s)") parser.add_argument( "--normalizer_manifest_path", - default='./manifest.libri.train-clean-100', + default='data/manifest.libri.train-clean-100', type=str, help="Manifest path for normalizer. (default: %(default)s)") parser.add_argument( "--decode_manifest_path", - default='./manifest.libri.test-clean', + default='data/manifest.libri.test-clean', type=str, help="Manifest path for decoding. (default: %(default)s)") parser.add_argument( @@ -52,34 +52,28 @@ parser.add_argument( default='./params.tar.gz', type=str, help="Model filepath. (default: %(default)s)") +parser.add_argument( + "--vocab_filepath", + default='data/eng_vocab.txt', + type=str, + help="Vocabulary filepath. (default: %(default)s)") args = parser.parse_args() -def remove_duplicate_and_blank(id_list, blank_id): - """ - Postprocessing for max-ctc-decoder. - - remove consecutive duplicate tokens. - - remove blanks. - """ - # remove consecutive duplicate tokens - id_list = [x[0] for x in groupby(id_list)] - # remove blanks - return [id for id in id_list if id != blank_id] - - -def best_path_decode(): +def infer(): """ Max-ctc-decoding for DeepSpeech2. """ # initialize data generator data_generator = DataGenerator( - vocab_filepath='eng_vocab.txt', + vocab_filepath=args.vocab_filepath, normalizer_manifest_path=args.normalizer_manifest_path, normalizer_num_samples=200, max_duration=20.0, min_duration=0.0, stride_ms=10, window_ms=20) + # create network config dict_size = data_generator.vocabulary_size() vocab_list = data_generator.vocabulary_list() @@ -91,13 +85,14 @@ def best_path_decode(): text_data = paddle.layer.data( name="transcript_text", type=paddle.data_type.integer_value_sequence(dict_size)) - _, max_id = deep_speech2( + output_probs = deep_speech2( audio_data=audio_data, text_data=text_data, dict_size=dict_size, num_conv_layers=args.num_conv_layers, num_rnn_layers=args.num_rnn_layers, - rnn_size=args.rnn_layer_size) + rnn_size=args.rnn_layer_size, + is_inference=True) # load parameters parameters = paddle.parameters.Parameters.from_tar( @@ -114,30 +109,28 @@ def best_path_decode(): shuffle=False) infer_data = test_batch_reader().next() - # run max-ctc-decoding - max_id_results = paddle.infer( - output_layer=max_id, - parameters=parameters, - input=infer_data, - field=['id']) - - # postprocess - instance_length = len(max_id_results) / args.num_samples - instance_list = [ - max_id_results[i * instance_length:(i + 1) * instance_length] - for i in xrange(0, args.num_samples) + # run inference + infer_results = paddle.infer( + output_layer=output_probs, parameters=parameters, input=infer_data) + num_steps = len(infer_results) / len(infer_data) + probs_split = [ + infer_results[i * num_steps:(i + 1) * num_steps] + for i in xrange(0, len(infer_data)) ] - for i, instance in enumerate(instance_list): - id_list = remove_duplicate_and_blank(instance, dict_size) - output_transcript = ''.join([vocab_list[id] for id in id_list]) - target_transcript = ''.join([vocab_list[id] for id in infer_data[i][1]]) - print("Target Transcript: %s \nOutput Transcript: %s \n" % - (target_transcript, output_transcript)) + + # decode and print + for i, probs in enumerate(probs_split): + output_transcription = ctc_decode( + probs_seq=probs, vocabulary=vocab_list, method="best_path") + target_transcription = ''.join( + [vocab_list[index] for index in infer_data[i][1]]) + print("Target Transcription: %s \nOutput Transcription: %s \n" % + (target_transcription, output_transcription)) def main(): paddle.init(use_gpu=args.use_gpu, trainer_count=1) - best_path_decode() + infer() if __name__ == '__main__': diff --git a/model.py b/model.py index 6b396900e..13ff829b9 100644 --- a/model.py +++ b/model.py @@ -85,7 +85,8 @@ def deep_speech2(audio_data, dict_size, num_conv_layers=2, num_rnn_layers=3, - rnn_size=256): + rnn_size=256, + is_inference=False): """ The whole DeepSpeech2 model structure (a simplified version). @@ -101,7 +102,12 @@ def deep_speech2(audio_data, :type num_rnn_layers: int :param rnn_size: RNN layer size (number of RNN cells). :type rnn_size: int - :return: Tuple of the cost layer and the max_id decoder layer. + :param is_inference: False in the training mode, and True in the + inferene mode. + :type is_inference: bool + :return: If is_inference set False, return a ctc cost layer; + if is_inference set True, return a sequence layer of output + probability distribution. :rtype: tuple of LayerOutput """ # convolution group @@ -118,19 +124,21 @@ def deep_speech2(audio_data, # rnn group rnn_group_output = rnn_group( input=conv2seq, size=rnn_size, num_stacks=num_rnn_layers) - # output token distribution fc = paddle.layer.fc( input=rnn_group_output, size=dict_size + 1, act=paddle.activation.Linear(), bias_attr=True) - # ctc cost - cost = paddle.layer.warp_ctc( - input=fc, - label=text_data, - size=dict_size + 1, - blank=dict_size, - norm_by_times=True) - # max decoder - max_id = paddle.layer.max_id(input=fc) - return cost, max_id + if is_inference: + # probability distribution with softmax + return paddle.layer.mixed( + input=paddle.layer.identity_projection(input=fc), + act=paddle.activation.Softmax()) + else: + # ctc cost + return paddle.layer.warp_ctc( + input=fc, + label=text_data, + size=dict_size + 1, + blank=dict_size, + norm_by_times=True) diff --git a/train.py b/train.py index ad6e5ffd1..e6a7d076b 100644 --- a/train.py +++ b/train.py @@ -60,19 +60,24 @@ parser.add_argument( help="Trainer number. (default: %(default)s)") parser.add_argument( "--normalizer_manifest_path", - default='./manifest.libri.train-clean-100', + default='data/manifest.libri.train-clean-100', type=str, help="Manifest path for normalizer. (default: %(default)s)") parser.add_argument( "--train_manifest_path", - default='./manifest.libri.train-clean-100', + default='data/manifest.libri.train-clean-100', type=str, help="Manifest path for training. (default: %(default)s)") parser.add_argument( "--dev_manifest_path", - default='./manifest.libri.dev-clean', + default='data/manifest.libri.dev-clean', type=str, help="Manifest path for validation. (default: %(default)s)") +parser.add_argument( + "--vocab_filepath", + default='data/eng_vocab.txt', + type=str, + help="Vocabulary filepath. (default: %(default)s)") args = parser.parse_args() @@ -82,7 +87,7 @@ def train(): """ # initialize data generator data_generator = DataGenerator( - vocab_filepath='eng_vocab.txt', + vocab_filepath=args.vocab_filepath, normalizer_manifest_path=args.normalizer_manifest_path, normalizer_num_samples=200, max_duration=20.0, @@ -100,13 +105,14 @@ def train(): text_data = paddle.layer.data( name="transcript_text", type=paddle.data_type.integer_value_sequence(dict_size)) - cost, _ = deep_speech2( + cost = deep_speech2( audio_data=audio_data, text_data=text_data, dict_size=dict_size, num_conv_layers=args.num_conv_layers, num_rnn_layers=args.num_rnn_layers, - rnn_size=args.rnn_layer_size) + rnn_size=args.rnn_layer_size, + is_inference=False) # create parameters and optimizer parameters = paddle.parameters.create(cost) @@ -118,21 +124,21 @@ def train(): # prepare data reader train_batch_reader_sortagrad = data_generator.batch_reader_creator( manifest_path=args.train_manifest_path, - batch_size=args.batch_size // args.trainer_count, + batch_size=args.batch_size, padding_to=2000, flatten=True, sort_by_duration=True, shuffle=False) train_batch_reader_nosortagrad = data_generator.batch_reader_creator( manifest_path=args.train_manifest_path, - batch_size=args.batch_size // args.trainer_count, + batch_size=args.batch_size, padding_to=2000, flatten=True, sort_by_duration=False, shuffle=True) test_batch_reader = data_generator.batch_reader_creator( manifest_path=args.dev_manifest_path, - batch_size=args.batch_size // args.trainer_count, + batch_size=args.batch_size, padding_to=2000, flatten=True, sort_by_duration=False, @@ -141,9 +147,7 @@ def train(): # create event handler def event_handler(event): - global start_time - global cost_sum - global cost_counter + global start_time, cost_sum, cost_counter if isinstance(event, paddle.event.EndIteration): cost_sum += event.cost cost_counter += 1 From 730d5c4dd3fa3583202189636ff852f78a76b5da Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Sat, 3 Jun 2017 15:18:33 +0800 Subject: [PATCH 009/335] Update DS2 README.md and fix bug in librispeech.py --- README.md | 10 +++++----- data/librispeech.py | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index b20c75f97..bb1815c00 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,8 @@ # Deep Speech 2 on PaddlePaddle -## Quick Start +## Installation -### Installation - -Please replace `$PADDLE_INSTALL_DIR` with your paddle installation directory. +Please replace `$PADDLE_INSTALL_DIR` with your own paddle installation directory. ``` pip install -r requirements.txt @@ -13,7 +11,9 @@ export LD_LIBRARY_PATH=$PADDLE_INSTALL_DIR/Paddle/third_party/install/warpctc/li For some machines, we also need to install libsndfile1. Details to be added. -### Preparing Dataset(s) +## Usage + +### Preparing Data ``` cd data diff --git a/data/librispeech.py b/data/librispeech.py index 676bbec5c..838fee597 100644 --- a/data/librispeech.py +++ b/data/librispeech.py @@ -15,7 +15,7 @@ import argparse import soundfile import json -DATA_HOME = os.path.expanduser('~/.cache2/paddle/dataset/speech') +DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech') URL_ROOT = "http://www.openslr.org/resources/12" URL_TEST_CLEAN = URL_ROOT + "/test-clean.tar.gz" @@ -30,7 +30,7 @@ MD5_TEST_CLEAN = "32fa31d27d2e1cad72775fee3f4849a9" MD5_DEV_CLEAN = "42e2234ba48799c1f50f24a7926300a1" MD5_TRAIN_CLEAN_100 = "2a93770f6d5c6c964bc36631d331a522" MD5_TRAIN_CLEAN_360 = "c0e676e450a7ff2f54aeade5171606fa" -MD5_TRAIN_CLEAN_500 = "d1a0fd59409feb2c614ce4d30c387708" +MD5_TRAIN_OTHER_500 = "d1a0fd59409feb2c614ce4d30c387708" parser = argparse.ArgumentParser( description='Downloads and prepare LibriSpeech dataset.') From d2e467385d8367ac072a7d98688466d74661cc4b Mon Sep 17 00:00:00 2001 From: yangyaming Date: Mon, 5 Jun 2017 21:00:15 +0800 Subject: [PATCH 010/335] Add loading model function for train.py. --- train.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/train.py b/train.py index e6a7d076b..14c7cf637 100644 --- a/train.py +++ b/train.py @@ -11,6 +11,7 @@ import sys from model import deep_speech2 from audio_data_utils import DataGenerator import numpy as np +import os #TODO: add WER metric @@ -78,6 +79,11 @@ parser.add_argument( default='data/eng_vocab.txt', type=str, help="Vocabulary filepath. (default: %(default)s)") +parser.add_argument( + "--init_model_path", + default='models/params.tar.gz', + type=str, + help="Model path for initialization. (default: %(default)s)") args = parser.parse_args() @@ -114,8 +120,13 @@ def train(): rnn_size=args.rnn_layer_size, is_inference=False) - # create parameters and optimizer - parameters = paddle.parameters.create(cost) + # create/load parameters and optimizer + if args.init_model_path is None: + parameters = paddle.parameters.create(cost) + else: + assert os.path.isfile(args.init_model_path), "Invalid model." + parameters = paddle.parameters.Parameters.from_tar( + gzip.open(args.init_model_path)) optimizer = paddle.optimizer.Adam( learning_rate=args.adam_learning_rate, gradient_clipping_threshold=400) trainer = paddle.trainer.SGD( From 7c85e0fdb5ffac76df6f3d99519e344be7c9b5dd Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Wed, 7 Jun 2017 16:37:13 +0800 Subject: [PATCH 011/335] Support variable input batch and sortagrad. --- audio_data_utils.py | 56 +++++++++++++++++++++++++++++------------ train.py | 61 ++++++++++++++++----------------------------- 2 files changed, 62 insertions(+), 55 deletions(-) diff --git a/audio_data_utils.py b/audio_data_utils.py index c717bcf18..abb7f1e99 100644 --- a/audio_data_utils.py +++ b/audio_data_utils.py @@ -8,6 +8,7 @@ import json import random import soundfile import numpy as np +import itertools import os RANDOM_SEED = 0 @@ -62,6 +63,7 @@ class DataGenerator(object): self.__stride_ms__ = stride_ms self.__window_ms__ = window_ms self.__max_frequency__ = max_frequency + self.__epoc__ = 0 self.__random__ = random.Random(RANDOM_SEED) # load vocabulary (dictionary) self.__vocab_dict__, self.__vocab_list__ = \ @@ -245,9 +247,33 @@ class DataGenerator(object): new_batch.append((padded_audio, text)) return new_batch + def __batch_shuffle__(self, manifest, batch_size): + """ + 1. Sort the audio clips by duration. + 2. Generate a random number `k`, k in [0, batch_size). + 3. Randomly remove `k` instances in order to make different mini-batches, + then make minibatches and each minibatch size is batch_size. + 4. Shuffle the minibatches. + + :param manifest: manifest file. + :type manifest: list + :param batch_size: batch size. + :type batch_size: int + """ + manifest.sort(key=lambda x: x["duration"]) + shift_len = self.__random__.randint(0, batch_size - 1) + batch_manifest = zip(*[iter(manifest[shift_len:])] * batch_size) + self.__random__.shuffle(batch_manifest) + batch_manifest = list(sum(batch_manifest, ())) + res_len = len(manifest) - shift_len - len(batch_manifest) + batch_manifest.extend(manifest[-res_len:]) + batch_manifest.extend(manifest[0:shift_len]) + return batch_manifest + def instance_reader_creator(self, manifest_path, - sort_by_duration=True, + batch_size, + sortagrad=True, shuffle=False): """ Instance reader creator for audio data. Creat a callable function to @@ -258,18 +284,14 @@ class DataGenerator(object): :param manifest_path: Filepath of manifest for audio clip files. :type manifest_path: basestring - :param sort_by_duration: Sort the audio clips by duration if set True - (for SortaGrad). - :type sort_by_duration: bool + :param sortagrad: Sort the audio clips by duration in the first epoc + if set True. + :type sortagrad: bool :param shuffle: Shuffle the audio clips if set True. :type shuffle: bool :return: Data reader function. :rtype: callable """ - if sort_by_duration and shuffle: - sort_by_duration = False - logger.warn("When shuffle set to true, " - "sort_by_duration is forced to set False.") def reader(): # read manifest @@ -278,16 +300,17 @@ class DataGenerator(object): max_duration=self.__max_duration__, min_duration=self.__min_duration__) # sort (by duration) or shuffle manifest - if sort_by_duration: + if self.__epoc__ == 0 and sortagrad: manifest.sort(key=lambda x: x["duration"]) - if shuffle: - self.__random__.shuffle(manifest) + elif shuffle: + manifest = self.__batch_shuffle__(manifest, batch_size) # extract spectrogram feature for instance in manifest: spectrogram = self.__audio_featurize__( instance["audio_filepath"]) transcript = self.__text_featurize__(instance["text"]) yield (spectrogram, transcript) + self.__epoc__ += 1 return reader @@ -296,7 +319,7 @@ class DataGenerator(object): batch_size, padding_to=-1, flatten=False, - sort_by_duration=True, + sortagrad=False, shuffle=False): """ Batch data reader creator for audio data. Creat a callable function to @@ -317,9 +340,9 @@ class DataGenerator(object): :param flatten: If set True, audio data will be flatten to be a 1-dim ndarray. Otherwise, 2-dim ndarray. Default is False. :type flatten: bool - :param sort_by_duration: Sort the audio clips by duration if set True - (for SortaGrad). - :type sort_by_duration: bool + :param sortagrad: Sort the audio clips by duration in the first epoc + if set True. + :type sortagrad: bool :param shuffle: Shuffle the audio clips if set True. :type shuffle: bool :return: Batch reader function, producing batches of data when called. @@ -329,7 +352,8 @@ class DataGenerator(object): def batch_reader(): instance_reader = self.instance_reader_creator( manifest_path=manifest_path, - sort_by_duration=sort_by_duration, + batch_size=batch_size, + sortagrad=sortagrad, shuffle=shuffle) batch = [] for instance in instance_reader(): diff --git a/train.py b/train.py index e6a7d076b..55577b0d8 100644 --- a/train.py +++ b/train.py @@ -85,23 +85,27 @@ def train(): """ DeepSpeech2 training. """ + # initialize data generator - data_generator = DataGenerator( - vocab_filepath=args.vocab_filepath, - normalizer_manifest_path=args.normalizer_manifest_path, - normalizer_num_samples=200, - max_duration=20.0, - min_duration=0.0, - stride_ms=10, - window_ms=20) + def data_generator(): + return DataGenerator( + vocab_filepath=args.vocab_filepath, + normalizer_manifest_path=args.normalizer_manifest_path, + normalizer_num_samples=200, + max_duration=20.0, + min_duration=0.0, + stride_ms=10, + window_ms=20) + train_generator = data_generator() + test_generator = data_generator() # create network config - dict_size = data_generator.vocabulary_size() + dict_size = train_generator.vocabulary_size() + # paddle.data_type.dense_array is used for variable batch input. + # the size 161 * 161 is only an placeholder value and the real shape + # of input batch data will be set at each batch. audio_data = paddle.layer.data( - name="audio_spectrogram", - height=161, - width=2000, - type=paddle.data_type.dense_vector(322000)) + name="audio_spectrogram", type=paddle.data_type.dense_array(161 * 161)) text_data = paddle.layer.data( name="transcript_text", type=paddle.data_type.integer_value_sequence(dict_size)) @@ -122,28 +126,16 @@ def train(): cost=cost, parameters=parameters, update_equation=optimizer) # prepare data reader - train_batch_reader_sortagrad = data_generator.batch_reader_creator( - manifest_path=args.train_manifest_path, - batch_size=args.batch_size, - padding_to=2000, - flatten=True, - sort_by_duration=True, - shuffle=False) - train_batch_reader_nosortagrad = data_generator.batch_reader_creator( + train_batch_reader = train_generator.batch_reader_creator( manifest_path=args.train_manifest_path, batch_size=args.batch_size, - padding_to=2000, - flatten=True, - sort_by_duration=False, + sortagrad=True, shuffle=True) - test_batch_reader = data_generator.batch_reader_creator( + test_batch_reader = test_generator.batch_reader_creator( manifest_path=args.dev_manifest_path, batch_size=args.batch_size, - padding_to=2000, - flatten=True, - sort_by_duration=False, shuffle=False) - feeding = data_generator.data_name_feeding() + feeding = train_generator.data_name_feeding() # create event handler def event_handler(event): @@ -169,17 +161,8 @@ def train(): time.time() - start_time, event.pass_id, result.cost) # run train - # first pass with sortagrad - if args.use_sortagrad: - trainer.train( - reader=train_batch_reader_sortagrad, - event_handler=event_handler, - num_passes=1, - feeding=feeding) - args.num_passes -= 1 - # other passes without sortagrad trainer.train( - reader=train_batch_reader_nosortagrad, + reader=train_batch_reader, event_handler=event_handler, num_passes=args.num_passes, feeding=feeding) From d3eeb7fd76f8b9f86ca01e80f524dde652211428 Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Wed, 7 Jun 2017 17:44:11 +0800 Subject: [PATCH 012/335] Refine librispeech.py for DeepSpeech2. Summary: 1. Add manifest line check. 2. Avoid re-unpacking if unpacked data already exists. 3. Add full_download (download all 7 sub-datasets of LibriSpeech). --- README.md | 5 ++- data/librispeech.py | 90 ++++++++++++++++++++++++++++++++++++++------- 2 files changed, 80 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index bb1815c00..403511d58 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,7 @@ For some machines, we also need to install libsndfile1. Details to be added. ``` cd data python librispeech.py +cat manifest.libri.train-* > manifest.libri.train-all cd .. ``` @@ -32,13 +33,13 @@ python librispeech.py --help For GPU Training: ``` -CUDA_VISIBLE_DEVICES=0,1,2,3 python train.py --trainer_count 4 +CUDA_VISIBLE_DEVICES=0,1,2,3 python train.py --trainer_count 4 --train_manifest_path ./data/manifest.libri.train-all ``` For CPU Training: ``` -python train.py --trainer_count 8 --use_gpu False +python train.py --trainer_count 8 --use_gpu False -- train_manifest_path ./data/manifest.libri.train-all ``` More help for arguments: diff --git a/data/librispeech.py b/data/librispeech.py index 838fee597..8bc33575e 100644 --- a/data/librispeech.py +++ b/data/librispeech.py @@ -1,13 +1,15 @@ """ - Download, unpack and create manifest for Librespeech dataset. + Download, unpack and create manifest file for the Librespeech dataset. - Manifest is a json file with each line containing one audio clip filepath, - its transcription text string, and its duration. It servers as a unified - interfance to organize different data sets. + A manifest file is a dataset summarization, with each line a json format + string containing meta data for one audio clip, including its filepath, + transcription string, and duration. It serves as a unified interface for + different data sets. """ import paddle.v2 as paddle from paddle.v2.dataset.common import md5file +import distutils.util import os import wget import tarfile @@ -27,11 +29,21 @@ URL_TRAIN_CLEAN_360 = URL_ROOT + "/train-clean-360.tar.gz" URL_TRAIN_OTHER_500 = URL_ROOT + "/train-other-500.tar.gz" MD5_TEST_CLEAN = "32fa31d27d2e1cad72775fee3f4849a9" +MD5_TEST_OTHER = "fb5a50374b501bb3bac4815ee91d3135" MD5_DEV_CLEAN = "42e2234ba48799c1f50f24a7926300a1" +MD5_DEV_OTHER = "c8d0bcc9cca99d4f8b62fcc847357931" MD5_TRAIN_CLEAN_100 = "2a93770f6d5c6c964bc36631d331a522" MD5_TRAIN_CLEAN_360 = "c0e676e450a7ff2f54aeade5171606fa" MD5_TRAIN_OTHER_500 = "d1a0fd59409feb2c614ce4d30c387708" +NUM_LINES_TEST_CLEAN = 2620 +NUM_LINES_TEST_OTHER = 2939 +NUM_LINES_DEV_CLEAN = 2703 +NUM_LINES_DEV_OTHER = 2864 +NUM_LINES_TRAIN_CLEAN_100 = 28539 +NUM_LINES_TRAIN_CLEAN_360 = 104014 +NUM_LINES_TRAIN_OTHER_500 = 148688 + parser = argparse.ArgumentParser( description='Downloads and prepare LibriSpeech dataset.') parser.add_argument( @@ -44,6 +56,13 @@ parser.add_argument( default="manifest.libri", type=str, help="Filepath prefix for output manifests. (default: %(default)s)") +parser.add_argument( + "--full_download", + default="True", + type=distutils.util.strtobool, + help="Download all datasets for Librispeech." + " If False, only download a minimal requirement (test-clean, dev-clean" + " train-clean-100). (default: %(default)s)") args = parser.parse_args() @@ -57,7 +76,10 @@ def download(url, md5sum, target_dir): print("Downloading %s ..." % url) wget.download(url, target_dir) print("\nMD5 Chesksum %s ..." % filepath) - assert md5file(filepath) == md5sum, "MD5 checksum failed." + if not md5file(filepath) == md5sum: + raise RuntimeError("MD5 checksum failed.") + else: + print("File exists, skip downloading. (%s)" % filepath) return filepath @@ -69,7 +91,6 @@ def unpack(filepath, target_dir): tar = tarfile.open(filepath) tar.extractall(target_dir) tar.close() - return target_dir def create_manifest(data_dir, manifest_path): @@ -83,7 +104,7 @@ def create_manifest(data_dir, manifest_path): """ print("Creating manifest %s ..." % manifest_path) json_lines = [] - for subfolder, _, filelist in os.walk(data_dir): + for subfolder, _, filelist in sorted(os.walk(data_dir)): text_filelist = [ filename for filename in filelist if filename.endswith('trans.txt') ] @@ -107,13 +128,28 @@ def create_manifest(data_dir, manifest_path): out_file.write(line + '\n') -def prepare_dataset(url, md5sum, target_dir, manifest_path): +def verify_file_line_number(filepath, num_lines): + with open(filepath, 'r') as file: + return len(file.readlines()) == num_lines + + +def prepare_dataset(url, md5sum, target_dir, manifest_path, num_lines): """ Download, unpack and create summmary manifest file. """ + # download filepath = download(url, md5sum, target_dir) - unpacked_dir = unpack(filepath, target_dir) - create_manifest(unpacked_dir, manifest_path) + # unpack + if not os.path.exists(os.path.join(target_dir, "LibriSpeech")): + unpack(filepath, target_dir) + else: + print("Unpacked data exists, skip unpacking.") + # create manifest and verify line number + create_manifest(target_dir, manifest_path) + if not verify_file_line_number(manifest_path, num_lines): + raise RuntimeError("Manifest line number check failed. " + "Please remove directory and try running the script " + "again.") def main(): @@ -121,17 +157,45 @@ def main(): url=URL_TEST_CLEAN, md5sum=MD5_TEST_CLEAN, target_dir=os.path.join(args.target_dir, "test-clean"), - manifest_path=args.manifest_prefix + ".test-clean") + manifest_path=args.manifest_prefix + ".test-clean", + num_lines=NUM_LINES_TEST_CLEAN) prepare_dataset( url=URL_DEV_CLEAN, md5sum=MD5_DEV_CLEAN, target_dir=os.path.join(args.target_dir, "dev-clean"), - manifest_path=args.manifest_prefix + ".dev-clean") + manifest_path=args.manifest_prefix + ".dev-clean", + num_lines=NUM_LINES_DEV_CLEAN) prepare_dataset( url=URL_TRAIN_CLEAN_100, md5sum=MD5_TRAIN_CLEAN_100, target_dir=os.path.join(args.target_dir, "train-clean-100"), - manifest_path=args.manifest_prefix + ".train-clean-100") + manifest_path=args.manifest_prefix + ".train-clean-100", + num_lines=NUM_LINES_TRAIN_CLEAN_100) + if args.full_download: + prepare_dataset( + url=URL_TEST_OTHER, + md5sum=MD5_TEST_OTHER, + target_dir=os.path.join(args.target_dir, "test-other"), + manifest_path=args.manifest_prefix + ".test-other", + num_lines=NUM_LINES_TEST_OTHER) + prepare_dataset( + url=URL_DEV_OTHER, + md5sum=MD5_DEV_OTHER, + target_dir=os.path.join(args.target_dir, "dev-other"), + manifest_path=args.manifest_prefix + ".dev-other", + num_lines=NUM_LINES_DEV_OTHER) + prepare_dataset( + url=URL_TRAIN_CLEAN_360, + md5sum=MD5_TRAIN_CLEAN_360, + target_dir=os.path.join(args.target_dir, "train-clean-360"), + manifest_path=args.manifest_prefix + ".train-clean-360", + num_lines=NUM_LINES_TRAIN_CLEAN_360) + prepare_dataset( + url=URL_TRAIN_OTHER_500, + md5sum=MD5_TRAIN_OTHER_500, + target_dir=os.path.join(args.target_dir, "train-other-500"), + manifest_path=args.manifest_prefix + ".train-other-500", + num_lines=NUM_LINES_TRAIN_OTHER_500) if __name__ == '__main__': From f49eab5fec2b478a7822f6459e4a8e7023f65df1 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Wed, 7 Jun 2017 19:11:21 +0800 Subject: [PATCH 013/335] Change assert to exception raising. --- train.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/train.py b/train.py index 14c7cf637..89ab23c68 100644 --- a/train.py +++ b/train.py @@ -81,9 +81,11 @@ parser.add_argument( help="Vocabulary filepath. (default: %(default)s)") parser.add_argument( "--init_model_path", - default='models/params.tar.gz', + default=None, type=str, - help="Model path for initialization. (default: %(default)s)") + help="If set None, the training will start from scratch. " + "Otherwise, the training will resume from " + "the existing model of this path. (default: %(default)s)") args = parser.parse_args() @@ -124,7 +126,8 @@ def train(): if args.init_model_path is None: parameters = paddle.parameters.create(cost) else: - assert os.path.isfile(args.init_model_path), "Invalid model." + if not os.path.isfile(args.init_model_path): + raise IOError("Invalid model!") parameters = paddle.parameters.Parameters.from_tar( gzip.open(args.init_model_path)) optimizer = paddle.optimizer.Adam( From cc2a4d4e3df7eece1430d9a9ffdda9f104509154 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Thu, 8 Jun 2017 17:18:38 +0800 Subject: [PATCH 014/335] Add error rate calculation script. --- error_rate.py | 138 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 138 insertions(+) create mode 100644 error_rate.py diff --git a/error_rate.py b/error_rate.py new file mode 100644 index 000000000..4739238e7 --- /dev/null +++ b/error_rate.py @@ -0,0 +1,138 @@ +# -- * -- coding: utf-8 -- * -- +import numpy as np + + +def levenshtein_distance(ref, hyp): + ref_len = len(ref) + hyp_len = len(hyp) + + # special case + if ref == hyp: + return 0 + if ref_len == 0: + return hyp_len + if hyp_len == 0: + return ref_len + + distance = np.zeros((ref_len + 1) * (hyp_len + 1), dtype=np.int64) + distance = distance.reshape((ref_len + 1, hyp_len + 1)) + + # initialization distance matrix + for j in xrange(hyp_len + 1): + distance[0][j] = j + for i in xrange(ref_len + 1): + distance[i][0] = i + + # calculate levenshtein distance + for i in xrange(1, ref_len + 1): + for j in xrange(1, hyp_len + 1): + if ref[i - 1] == hyp[j - 1]: + distance[i][j] = distance[i - 1][j - 1] + else: + s_num = distance[i - 1][j - 1] + 1 + i_num = distance[i][j - 1] + 1 + d_num = distance[i - 1][j] + 1 + distance[i][j] = min(s_num, i_num, d_num) + + return distance[ref_len][hyp_len] + + +def wer(reference, hypophysis, delimiter=' ', filter_none=True): + """ + Calculate word error rate (WER). WER is a popular evaluation metric used + in speech recognition. It compares a reference to an hypophysis and + is defined like this: + + .. math:: + WER = (Sw + Dw + Iw) / Nw + + where + + .. code-block:: text + + Sw is the number of words subsituted, + Dw is the number of words deleted, + Iw is the number of words inserted, + Nw is the number of words in the reference + + We can use levenshtein distance to calculate WER. Take an attention that + this function will truncate the beginning and ending delimiter for + reference and hypophysis sentences before calculating WER. + + :param reference: The reference sentence. + :type reference: str + :param hypophysis: The hypophysis sentence. + :type reference: str + :param delimiter: Delimiter of input sentences. + :type delimiter: char + :param filter_none: Whether to remove None value when splitting sentence. + :type filter_none: bool + :return: WER + :rtype: float + """ + + if len(reference.strip(delimiter)) == 0: + raise ValueError("Reference's word number should be greater than 0.") + + if filter_none == True: + ref_words = filter(None, reference.strip(delimiter).split(delimiter)) + hyp_words = filter(None, hypophysis.strip(delimiter).split(delimiter)) + else: + ref_words = reference.strip(delimiter).split(delimiter) + hyp_words = reference.strip(delimiter).split(delimiter) + + edit_distance = levenshtein_distance(ref_words, hyp_words) + wer = float(edit_distance) / len(ref_words) + return wer + + +def cer(reference, hypophysis, squeeze=True, ignore_case=False, strip_char=''): + """ + Calculate charactor error rate (CER). CER will compare reference text and + hypophysis text in char-level. CER is defined as: + + .. math:: + CER = (Sc + Dc + Ic) / Nc + + where + + .. code-block:: text + + Sc is the number of character substituted, + Dc is the number of deleted, + Ic is the number of inserted + Nc is the number of characters in the reference + + We can use levenshtein distance to calculate CER. Chinese input should be + encoded to unicode. + + :param reference: The reference sentence. + :type reference: str + :param hypophysis: The hypophysis sentence. + :type reference: str + :param squeeze: If set true, consecutive space character + will be squeezed to one + :type squeezed: bool + :param ignore_case: Whether ignoring character case. + :type ignore_case: bool + :param strip_char: If not set to '', strip_char in beginning and ending of + sentence will be truncated. + :type strip_char: char + :return: CER + :rtype: float + """ + if ignore_case == True: + reference = reference.lower() + hypophysis = hypophysis.lower() + if strip_char != '': + reference = reference.strip(strip_char) + hypophysis = hypophysis.strip(strip_char) + if squeeze == True: + reference = ' '.join(filter(None, reference.split(' '))) + hypophysis = ' '.join(filter(None, hypophysis.split(' '))) + + if len(reference) == 0: + raise ValueError("Length of reference should be greater than 0.") + edit_distance = levenshtein_distance(reference, hypophysis) + cer = float(edit_distance) / len(reference) + return cer From 3f63e069e098b94fc64d59ac2c297271242cb3c1 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Thu, 8 Jun 2017 21:35:17 +0800 Subject: [PATCH 015/335] Fix typos and follow comments. --- error_rate.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/error_rate.py b/error_rate.py index 4739238e7..f216177e0 100644 --- a/error_rate.py +++ b/error_rate.py @@ -14,8 +14,7 @@ def levenshtein_distance(ref, hyp): if hyp_len == 0: return ref_len - distance = np.zeros((ref_len + 1) * (hyp_len + 1), dtype=np.int64) - distance = distance.reshape((ref_len + 1, hyp_len + 1)) + distance = np.zeros((ref_len + 1, hyp_len + 1), dtype=np.int64) # initialization distance matrix for j in xrange(hyp_len + 1): @@ -40,7 +39,7 @@ def levenshtein_distance(ref, hyp): def wer(reference, hypophysis, delimiter=' ', filter_none=True): """ Calculate word error rate (WER). WER is a popular evaluation metric used - in speech recognition. It compares a reference to an hypophysis and + in speech recognition. It compares a reference with an hypophysis and is defined like this: .. math:: @@ -55,8 +54,8 @@ def wer(reference, hypophysis, delimiter=' ', filter_none=True): Iw is the number of words inserted, Nw is the number of words in the reference - We can use levenshtein distance to calculate WER. Take an attention that - this function will truncate the beginning and ending delimiter for + We can use levenshtein distance to calculate WER. Please draw an attention + that this function will truncate the beginning and ending delimiter for reference and hypophysis sentences before calculating WER. :param reference: The reference sentence. @@ -111,12 +110,12 @@ def cer(reference, hypophysis, squeeze=True, ignore_case=False, strip_char=''): :param hypophysis: The hypophysis sentence. :type reference: str :param squeeze: If set true, consecutive space character - will be squeezed to one - :type squeezed: bool - :param ignore_case: Whether ignoring character case. + will be squeezed to one + :type squeeze: bool + :param ignore_case: Whether case-sensitive or not. :type ignore_case: bool :param strip_char: If not set to '', strip_char in beginning and ending of - sentence will be truncated. + sentence will be truncated. :type strip_char: char :return: CER :rtype: float From 06e9f713899f2118c08753bfe40bd2abf4d152b2 Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Thu, 8 Jun 2017 22:20:11 +0800 Subject: [PATCH 016/335] Remove manifest's line number check from librispeech.py and update README.md. --- README.md | 4 +++ data/librispeech.py | 69 ++++++++++++++------------------------------- 2 files changed, 25 insertions(+), 48 deletions(-) diff --git a/README.md b/README.md index 403511d58..7a372e9be 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,10 @@ cat manifest.libri.train-* > manifest.libri.train-all cd .. ``` +After running librispeech.py, we have several "manifest" json files named with a prefix `manifest.libri.`. A manifest file summarizes a speech data set, with each line containing the meta data (i.e. audio filepath, transcription text, audio duration) of each audio file within the data set, in json format. + +By `cat manifest.libri.train-* > manifest.libri.train-all`, we simply merge the three seperate sample sets of LibriSpeech (train-clean-100, train-clean-360, train-other-500) into one training set. This is a simple way for merging different data sets. + More help for arguments: ``` diff --git a/data/librispeech.py b/data/librispeech.py index 8bc33575e..653caa926 100644 --- a/data/librispeech.py +++ b/data/librispeech.py @@ -1,10 +1,9 @@ """ - Download, unpack and create manifest file for the Librespeech dataset. + Download, unpack and create manifest json files for the Librespeech dataset. - A manifest file is a dataset summarization, with each line a json format - string containing meta data for one audio clip, including its filepath, - transcription string, and duration. It serves as a unified interface for - different data sets. + A manifest is a json file summarizing filelist in a data set, with each line + containing the meta data (i.e. audio filepath, transcription text, audio + duration) of each audio file in the data set. """ import paddle.v2 as paddle @@ -36,14 +35,6 @@ MD5_TRAIN_CLEAN_100 = "2a93770f6d5c6c964bc36631d331a522" MD5_TRAIN_CLEAN_360 = "c0e676e450a7ff2f54aeade5171606fa" MD5_TRAIN_OTHER_500 = "d1a0fd59409feb2c614ce4d30c387708" -NUM_LINES_TEST_CLEAN = 2620 -NUM_LINES_TEST_OTHER = 2939 -NUM_LINES_DEV_CLEAN = 2703 -NUM_LINES_DEV_OTHER = 2864 -NUM_LINES_TRAIN_CLEAN_100 = 28539 -NUM_LINES_TRAIN_CLEAN_360 = 104014 -NUM_LINES_TRAIN_OTHER_500 = 148688 - parser = argparse.ArgumentParser( description='Downloads and prepare LibriSpeech dataset.') parser.add_argument( @@ -95,12 +86,9 @@ def unpack(filepath, target_dir): def create_manifest(data_dir, manifest_path): """ - Create a manifest file summarizing the dataset (list of filepath and meta - data). - - Each line of the manifest contains one audio clip filepath, its - transcription text string, and its duration. Manifest file servers as a - unified interfance to organize data sets. + Create a manifest json file summarizing the data set, with each line + containing the meta data (i.e. audio filepath, transcription text, audio + duration) of each audio file within the data set. """ print("Creating manifest %s ..." % manifest_path) json_lines = [] @@ -128,28 +116,20 @@ def create_manifest(data_dir, manifest_path): out_file.write(line + '\n') -def verify_file_line_number(filepath, num_lines): - with open(filepath, 'r') as file: - return len(file.readlines()) == num_lines - - -def prepare_dataset(url, md5sum, target_dir, manifest_path, num_lines): +def prepare_dataset(url, md5sum, target_dir, manifest_path): """ Download, unpack and create summmary manifest file. """ - # download - filepath = download(url, md5sum, target_dir) - # unpack if not os.path.exists(os.path.join(target_dir, "LibriSpeech")): + # download + filepath = download(url, md5sum, target_dir) + # unpack unpack(filepath, target_dir) else: - print("Unpacked data exists, skip unpacking.") - # create manifest and verify line number + print("Skip downloading and unpacking. Data already exists in %s." % + target_dir) + # create manifest json file create_manifest(target_dir, manifest_path) - if not verify_file_line_number(manifest_path, num_lines): - raise RuntimeError("Manifest line number check failed. " - "Please remove directory and try running the script " - "again.") def main(): @@ -157,45 +137,38 @@ def main(): url=URL_TEST_CLEAN, md5sum=MD5_TEST_CLEAN, target_dir=os.path.join(args.target_dir, "test-clean"), - manifest_path=args.manifest_prefix + ".test-clean", - num_lines=NUM_LINES_TEST_CLEAN) + manifest_path=args.manifest_prefix + ".test-clean") prepare_dataset( url=URL_DEV_CLEAN, md5sum=MD5_DEV_CLEAN, target_dir=os.path.join(args.target_dir, "dev-clean"), - manifest_path=args.manifest_prefix + ".dev-clean", - num_lines=NUM_LINES_DEV_CLEAN) + manifest_path=args.manifest_prefix + ".dev-clean") prepare_dataset( url=URL_TRAIN_CLEAN_100, md5sum=MD5_TRAIN_CLEAN_100, target_dir=os.path.join(args.target_dir, "train-clean-100"), - manifest_path=args.manifest_prefix + ".train-clean-100", - num_lines=NUM_LINES_TRAIN_CLEAN_100) + manifest_path=args.manifest_prefix + ".train-clean-100") if args.full_download: prepare_dataset( url=URL_TEST_OTHER, md5sum=MD5_TEST_OTHER, target_dir=os.path.join(args.target_dir, "test-other"), - manifest_path=args.manifest_prefix + ".test-other", - num_lines=NUM_LINES_TEST_OTHER) + manifest_path=args.manifest_prefix + ".test-other") prepare_dataset( url=URL_DEV_OTHER, md5sum=MD5_DEV_OTHER, target_dir=os.path.join(args.target_dir, "dev-other"), - manifest_path=args.manifest_prefix + ".dev-other", - num_lines=NUM_LINES_DEV_OTHER) + manifest_path=args.manifest_prefix + ".dev-other") prepare_dataset( url=URL_TRAIN_CLEAN_360, md5sum=MD5_TRAIN_CLEAN_360, target_dir=os.path.join(args.target_dir, "train-clean-360"), - manifest_path=args.manifest_prefix + ".train-clean-360", - num_lines=NUM_LINES_TRAIN_CLEAN_360) + manifest_path=args.manifest_prefix + ".train-clean-360") prepare_dataset( url=URL_TRAIN_OTHER_500, md5sum=MD5_TRAIN_OTHER_500, target_dir=os.path.join(args.target_dir, "train-other-500"), - manifest_path=args.manifest_prefix + ".train-other-500", - num_lines=NUM_LINES_TRAIN_OTHER_500) + manifest_path=args.manifest_prefix + ".train-other-500") if __name__ == '__main__': From c943ca79acefc85e605a6e414e90239ee56f98be Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Sun, 4 Jun 2017 19:15:09 +0800 Subject: [PATCH 017/335] mv ctc_beam_search_decoder into deep_speech_2/ --- ctc_beam_search_decoder.py | 162 ++++++++++++++++++++++++++++++++ test_ctc_beam_search_decoder.py | 69 ++++++++++++++ 2 files changed, 231 insertions(+) create mode 100644 ctc_beam_search_decoder.py create mode 100644 test_ctc_beam_search_decoder.py diff --git a/ctc_beam_search_decoder.py b/ctc_beam_search_decoder.py new file mode 100644 index 000000000..873121b16 --- /dev/null +++ b/ctc_beam_search_decoder.py @@ -0,0 +1,162 @@ +## This is a prototype of ctc beam search decoder + +import copy +import random +import numpy as np + +# vocab = blank + space + English characters +#vocab = ['-', ' '] + [chr(i) for i in range(97, 123)] + +vocab = ['-', '_', 'a'] + + +def ids_str2list(ids_str): + ids_str = ids_str.split(' ') + ids_list = [int(elem) for elem in ids_str] + return ids_list + + +def ids_list2str(ids_list): + ids_str = [str(elem) for elem in ids_list] + ids_str = ' '.join(ids_str) + return ids_str + + +def ids_id2token(ids_list): + ids_str = '' + for ids in ids_list: + ids_str += vocab[ids] + return ids_str + + +def ctc_beam_search_decoder(input_probs_matrix, + beam_size, + max_time_steps=None, + lang_model=None, + alpha=1.0, + beta=1.0, + blank_id=0, + space_id=1, + num_results_per_sample=None): + ''' + beam search decoder for CTC-trained network, called outside of the recurrent group. + adapted from Algorithm 1 in https://arxiv.org/abs/1408.2873. + + param input_probs_matrix: probs matrix for input sequence, row major + type input_probs_matrix: 2D matrix. + param beam_size: width for beam search + type beam_size: int + max_time_steps: maximum steps' number for input sequence, <=len(input_probs_matrix) + type max_time_steps: int + lang_model: language model for scoring + type lang_model: function + + ...... + + ''' + if num_results_per_sample is None: + num_results_per_sample = beam_size + assert num_results_per_sample <= beam_size + + if max_time_steps is None: + max_time_steps = len(input_probs_matrix) + else: + max_time_steps = min(max_time_steps, len(input_probs_matrix)) + assert max_time_steps > 0 + + vocab_dim = len(input_probs_matrix[0]) + assert blank_id < vocab_dim + assert space_id < vocab_dim + + ## initialize + start_id = -1 + # the set containing selected prefixes + prefix_set_prev = {str(start_id): 1.0} + probs_b, probs_nb = {str(start_id): 1.0}, {str(start_id): 0.0} + + ## extend prefix in loop + for time_step in range(max_time_steps): + # the set containing candidate prefixes + prefix_set_next = {} + probs_b_cur, probs_nb_cur = {}, {} + for l in prefix_set_prev: + prob = input_probs_matrix[time_step] + + # convert ids in string to list + ids_list = ids_str2list(l) + end_id = ids_list[-1] + if not prefix_set_next.has_key(l): + probs_b_cur[l], probs_nb_cur[l] = 0.0, 0.0 + + # extend prefix by travering vocabulary + for c in range(0, vocab_dim): + if c == blank_id: + probs_b_cur[l] += prob[c] * (probs_b[l] + probs_nb[l]) + else: + l_plus = l + ' ' + str(c) + if not prefix_set_next.has_key(l_plus): + probs_b_cur[l_plus], probs_nb_cur[l_plus] = 0.0, 0.0 + + if c == end_id: + probs_nb_cur[l_plus] += prob[c] * probs_b[l] + probs_nb_cur[l] += prob[c] * probs_nb[l] + elif c == space_id: + lm = 1.0 if lang_model is None \ + else np.power(lang_model(ids_list), alpha) + probs_nb_cur[l_plus] += lm * prob[c] * ( + probs_b[l] + probs_nb[l]) + else: + probs_nb_cur[l_plus] += prob[c] * ( + probs_b[l] + probs_nb[l]) + # add l_plus into prefix_set_next + prefix_set_next[l_plus] = probs_nb_cur[ + l_plus] + probs_b_cur[l_plus] + # add l into prefix_set_next + prefix_set_next[l] = probs_b_cur[l] + probs_nb_cur[l] + # update probs + probs_b, probs_nb = copy.deepcopy(probs_b_cur), copy.deepcopy( + probs_nb_cur) + + ## store top beam_size prefixes + prefix_set_prev = sorted( + prefix_set_next.iteritems(), key=lambda asd: asd[1], reverse=True) + if beam_size < len(prefix_set_prev): + prefix_set_prev = prefix_set_prev[:beam_size] + prefix_set_prev = dict(prefix_set_prev) + + beam_result = [] + for (seq, prob) in prefix_set_prev.items(): + if prob > 0.0: + ids_list = ids_str2list(seq) + log_prob = np.log(prob) + beam_result.append([log_prob, ids_list[1:]]) + + ## output top beam_size decoding results + beam_result = sorted(beam_result, key=lambda asd: asd[0], reverse=True) + if num_results_per_sample < beam_size: + beam_result = beam_result[:num_results_per_sample] + return beam_result + + +def language_model(input): + # TODO + return random.uniform(0, 1) + + +def simple_test(): + + input_probs_matrix = [[0.1, 0.3, 0.6], [0.2, 0.1, 0.7], [0.5, 0.2, 0.3]] + + beam_result = ctc_beam_search_decoder( + input_probs_matrix=input_probs_matrix, + beam_size=20, + blank_id=0, + space_id=1, ) + + print "\nbeam search output:" + for result in beam_result: + print("%6f\t%s" % (result[0], ids_id2token(result[1]))) + + +if __name__ == '__main__': + simple_test() diff --git a/test_ctc_beam_search_decoder.py b/test_ctc_beam_search_decoder.py new file mode 100644 index 000000000..f79704441 --- /dev/null +++ b/test_ctc_beam_search_decoder.py @@ -0,0 +1,69 @@ +from __future__ import absolute_import +from __future__ import print_function + +import numpy as np +import tensorflow as tf +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +import ctc_beam_search_decoder as tested_decoder + + +def test_beam_search_decoder(): + max_time_steps = 6 + beam_size = 20 + num_results_per_sample = 20 + + input_prob_matrix_0 = np.asarray( + [ + [0.30999, 0.309938, 0.0679938, 0.0673362, 0.0708352, 0.173908], + [0.215136, 0.439699, 0.0370931, 0.0393967, 0.0381581, 0.230517], + [0.199959, 0.489485, 0.0233221, 0.0251417, 0.0233289, 0.238763], + [0.279611, 0.452966, 0.0204795, 0.0209126, 0.0194803, 0.20655], + [0.51286, 0.288951, 0.0243026, 0.0220788, 0.0219297, 0.129878], + # Random entry added in at time=5 + [0.155251, 0.164444, 0.173517, 0.176138, 0.169979, 0.160671] + ], + dtype=np.float32) + + # Add arbitrary offset - this is fine + input_log_prob_matrix_0 = np.log(input_prob_matrix_0) #+ 2.0 + + # len max_time_steps array of batch_size x depth matrices + inputs = ([ + input_log_prob_matrix_0[t, :][np.newaxis, :] + for t in range(max_time_steps) + ]) + + inputs_t = [ops.convert_to_tensor(x) for x in inputs] + inputs_t = array_ops.stack(inputs_t) + + # run CTC beam search decoder in tensorflow + with tf.Session() as sess: + decoded, log_probabilities = tf.nn.ctc_beam_search_decoder( + inputs_t, [max_time_steps], + beam_width=beam_size, + top_paths=num_results_per_sample, + merge_repeated=False) + tf_decoded = sess.run(decoded) + tf_log_probs = sess.run(log_probabilities) + + # run tested CTC beam search decoder + beam_result = tested_decoder.ctc_beam_search_decoder( + input_probs_matrix=input_prob_matrix_0, + beam_size=beam_size, + blank_id=5, # default blank_id in tensorflow decoder is (num classes-1) + space_id=4, # doesn't matter + max_time_steps=max_time_steps, + num_results_per_sample=num_results_per_sample) + + # compare decoding result + print( + "{tf_decoder log probs} \t {tested_decoder log probs}: {tf_decoder result} {tested_decoder result}" + ) + for index in range(len(beam_result)): + print(('%6f\t%6f: ') % (tf_log_probs[0][index], beam_result[index][0]), + tf_decoded[index].values, ' ', beam_result[index][1]) + + +if __name__ == '__main__': + test_beam_search_decoder() From cfe9d22866e4e94802f25033c6217dee8f509c6a Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Sun, 4 Jun 2017 19:19:36 +0800 Subject: [PATCH 018/335] update annotations --- ctc_beam_search_decoder.py | 54 +++++++++++++++++++++++++------------- 1 file changed, 36 insertions(+), 18 deletions(-) diff --git a/ctc_beam_search_decoder.py b/ctc_beam_search_decoder.py index 873121b16..3223c0c25 100644 --- a/ctc_beam_search_decoder.py +++ b/ctc_beam_search_decoder.py @@ -10,12 +10,6 @@ import numpy as np vocab = ['-', '_', 'a'] -def ids_str2list(ids_str): - ids_str = ids_str.split(' ') - ids_list = [int(elem) for elem in ids_str] - return ids_list - - def ids_list2str(ids_list): ids_str = [str(elem) for elem in ids_list] ids_str = ' '.join(ids_str) @@ -39,21 +33,45 @@ def ctc_beam_search_decoder(input_probs_matrix, space_id=1, num_results_per_sample=None): ''' - beam search decoder for CTC-trained network, called outside of the recurrent group. - adapted from Algorithm 1 in https://arxiv.org/abs/1408.2873. + Beam search decoder for CTC-trained network, adapted from Algorithm 1 + in https://arxiv.org/abs/1408.2873. + + :param input_probs_matrix: probs matrix for input sequence, row major + :type input_probs_matrix: 2D matrix. + :param beam_size: width for beam search + :type beam_size: int + :max_time_steps: maximum steps' number for input sequence, + <=len(input_probs_matrix) + :type max_time_steps: int + :lang_model: language model for scoring + :type lang_model: function + :param alpha: parameter associated with language model. + :type alpha: float + :param beta: parameter associated with word count + :type beta: float + :param blank_id: id of blank, default 0. + :type blank_id: int + :param space_id: id of space, default 1. + :type space_id: int + :param num_result_per_sample: the number of output decoding results + per given sample, <=beam_size. + :type num_result_per_sample: int + ''' - param input_probs_matrix: probs matrix for input sequence, row major - type input_probs_matrix: 2D matrix. - param beam_size: width for beam search - type beam_size: int - max_time_steps: maximum steps' number for input sequence, <=len(input_probs_matrix) - type max_time_steps: int - lang_model: language model for scoring - type lang_model: function + # function to convert ids in string to list + def ids_str2list(ids_str): + ids_str = ids_str.split(' ') + ids_list = [int(elem) for elem in ids_str] + return ids_list - ...... + # counting words in a character list + def word_count(ids_list): + cnt = 0 + for elem in ids_list: + if elem == space_id: + cnt += 1 + return cnt - ''' if num_results_per_sample is None: num_results_per_sample = beam_size assert num_results_per_sample <= beam_size From dedbfb2654254e6c45b32221c6c6a09c2de09f9a Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Mon, 5 Jun 2017 08:48:54 +0800 Subject: [PATCH 019/335] enable ctc beam search decoder --- ctc_beam_search_decoder.py | 30 +++++++++++++++++++++--------- decoder.py | 16 ++++++++++++++-- infer.py | 33 ++++++++++++++++++++++++++++++--- 3 files changed, 65 insertions(+), 14 deletions(-) diff --git a/ctc_beam_search_decoder.py b/ctc_beam_search_decoder.py index 3223c0c25..f66d545aa 100644 --- a/ctc_beam_search_decoder.py +++ b/ctc_beam_search_decoder.py @@ -23,10 +23,26 @@ def ids_id2token(ids_list): return ids_str +def language_model(ids_list, vocabulary): + # lookup ptb vocabulary + ptb_vocab_path = "./data/ptb_vocab.txt" + sentence = ''.join([vocabulary[ids] for ids in ids_list]) + words = sentence.split(' ') + last_word = words[-1] + with open(ptb_vocab_path, 'r') as ptb_vocab: + f = ptb_vocab.readline() + while f: + if f == last_word: + return 1.0 + f = ptb_vocab.readline() + return 0.0 + + def ctc_beam_search_decoder(input_probs_matrix, beam_size, + vocabulary, max_time_steps=None, - lang_model=None, + lang_model=language_model, alpha=1.0, beta=1.0, blank_id=0, @@ -120,7 +136,7 @@ def ctc_beam_search_decoder(input_probs_matrix, probs_nb_cur[l] += prob[c] * probs_nb[l] elif c == space_id: lm = 1.0 if lang_model is None \ - else np.power(lang_model(ids_list), alpha) + else np.power(lang_model(ids_list, vocabulary), alpha) probs_nb_cur[l_plus] += lm * prob[c] * ( probs_b[l] + probs_nb[l]) else: @@ -145,9 +161,10 @@ def ctc_beam_search_decoder(input_probs_matrix, beam_result = [] for (seq, prob) in prefix_set_prev.items(): if prob > 0.0: - ids_list = ids_str2list(seq) + ids_list = ids_str2list(seq)[1:] + result = ''.join([vocabulary[ids] for ids in ids_list]) log_prob = np.log(prob) - beam_result.append([log_prob, ids_list[1:]]) + beam_result.append([log_prob, result]) ## output top beam_size decoding results beam_result = sorted(beam_result, key=lambda asd: asd[0], reverse=True) @@ -156,11 +173,6 @@ def ctc_beam_search_decoder(input_probs_matrix, return beam_result -def language_model(input): - # TODO - return random.uniform(0, 1) - - def simple_test(): input_probs_matrix = [[0.1, 0.3, 0.6], [0.2, 0.1, 0.7], [0.5, 0.2, 0.3]] diff --git a/decoder.py b/decoder.py index 7c4b95263..34e1715c3 100755 --- a/decoder.py +++ b/decoder.py @@ -4,6 +4,7 @@ from itertools import groupby import numpy as np +from ctc_beam_search_decoder import * def ctc_best_path_decode(probs_seq, vocabulary): @@ -36,7 +37,11 @@ def ctc_best_path_decode(probs_seq, vocabulary): return ''.join([vocabulary[index] for index in index_list]) -def ctc_decode(probs_seq, vocabulary, method): +def ctc_decode(probs_seq, + vocabulary, + method, + beam_size=None, + num_results_per_sample=None): """ CTC-like sequence decoding from a sequence of likelihood probablilites. @@ -56,5 +61,12 @@ def ctc_decode(probs_seq, vocabulary, method): raise ValueError("probs dimension mismatchedd with vocabulary") if method == "best_path": return ctc_best_path_decode(probs_seq, vocabulary) + elif method == "beam_search": + return ctc_beam_search_decoder( + input_probs_matrix=probs_seq, + vocabulary=vocabulary, + beam_size=beam_size, + blank_id=len(vocabulary), + num_results_per_sample=num_results_per_sample) else: - raise ValueError("Decoding method [%s] is not supported.") + raise ValueError("Decoding method [%s] is not supported." % method) diff --git a/infer.py b/infer.py index 598c348b0..e5ecf6f35 100644 --- a/infer.py +++ b/infer.py @@ -57,6 +57,23 @@ parser.add_argument( default='data/eng_vocab.txt', type=str, help="Vocabulary filepath. (default: %(default)s)") +parser.add_argument( + "--decode_method", + default='best_path', + type=str, + help="Method for ctc decoding, best_path or beam_search. (default: %(default)s)" +) +parser.add_argument( + "--beam_size", + default=50, + type=int, + help="Width for beam search decoding. (default: %(default)d)") +parser.add_argument( + "--num_result_per_sample", + default=2, + type=int, + help="Number of results per given sample in beam search. (default: %(default)d)" +) args = parser.parse_args() @@ -120,12 +137,22 @@ def infer(): # decode and print for i, probs in enumerate(probs_split): - output_transcription = ctc_decode( + best_path_transcription = ctc_decode( probs_seq=probs, vocabulary=vocab_list, method="best_path") target_transcription = ''.join( [vocab_list[index] for index in infer_data[i][1]]) - print("Target Transcription: %s \nOutput Transcription: %s \n" % - (target_transcription, output_transcription)) + print("\nTarget Transcription: %s \nBst_path Transcription: %s" % + (target_transcription, best_path_transcription)) + beam_search_transcription = ctc_decode( + probs_seq=probs, + vocabulary=vocab_list, + method="beam_search", + beam_size=args.beam_size, + num_results_per_sample=args.num_result_per_sample) + for index in range(len(beam_search_transcription)): + print("LM No, %d - %4f: %s " % + (index, beam_search_transcription[index][0], + beam_search_transcription[index][1])) def main(): From 51f35a53723779f042498e14786abb791d278c50 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Wed, 7 Jun 2017 02:21:35 +0800 Subject: [PATCH 020/335] code clean & add external scorer --- ctc_beam_search_decoder.py | 192 -------------------------------- decoder.py | 184 +++++++++++++++++++++++++----- infer.py | 72 ++++++++---- test_ctc_beam_search_decoder.py | 69 ------------ 4 files changed, 205 insertions(+), 312 deletions(-) delete mode 100644 ctc_beam_search_decoder.py delete mode 100644 test_ctc_beam_search_decoder.py diff --git a/ctc_beam_search_decoder.py b/ctc_beam_search_decoder.py deleted file mode 100644 index f66d545aa..000000000 --- a/ctc_beam_search_decoder.py +++ /dev/null @@ -1,192 +0,0 @@ -## This is a prototype of ctc beam search decoder - -import copy -import random -import numpy as np - -# vocab = blank + space + English characters -#vocab = ['-', ' '] + [chr(i) for i in range(97, 123)] - -vocab = ['-', '_', 'a'] - - -def ids_list2str(ids_list): - ids_str = [str(elem) for elem in ids_list] - ids_str = ' '.join(ids_str) - return ids_str - - -def ids_id2token(ids_list): - ids_str = '' - for ids in ids_list: - ids_str += vocab[ids] - return ids_str - - -def language_model(ids_list, vocabulary): - # lookup ptb vocabulary - ptb_vocab_path = "./data/ptb_vocab.txt" - sentence = ''.join([vocabulary[ids] for ids in ids_list]) - words = sentence.split(' ') - last_word = words[-1] - with open(ptb_vocab_path, 'r') as ptb_vocab: - f = ptb_vocab.readline() - while f: - if f == last_word: - return 1.0 - f = ptb_vocab.readline() - return 0.0 - - -def ctc_beam_search_decoder(input_probs_matrix, - beam_size, - vocabulary, - max_time_steps=None, - lang_model=language_model, - alpha=1.0, - beta=1.0, - blank_id=0, - space_id=1, - num_results_per_sample=None): - ''' - Beam search decoder for CTC-trained network, adapted from Algorithm 1 - in https://arxiv.org/abs/1408.2873. - - :param input_probs_matrix: probs matrix for input sequence, row major - :type input_probs_matrix: 2D matrix. - :param beam_size: width for beam search - :type beam_size: int - :max_time_steps: maximum steps' number for input sequence, - <=len(input_probs_matrix) - :type max_time_steps: int - :lang_model: language model for scoring - :type lang_model: function - :param alpha: parameter associated with language model. - :type alpha: float - :param beta: parameter associated with word count - :type beta: float - :param blank_id: id of blank, default 0. - :type blank_id: int - :param space_id: id of space, default 1. - :type space_id: int - :param num_result_per_sample: the number of output decoding results - per given sample, <=beam_size. - :type num_result_per_sample: int - ''' - - # function to convert ids in string to list - def ids_str2list(ids_str): - ids_str = ids_str.split(' ') - ids_list = [int(elem) for elem in ids_str] - return ids_list - - # counting words in a character list - def word_count(ids_list): - cnt = 0 - for elem in ids_list: - if elem == space_id: - cnt += 1 - return cnt - - if num_results_per_sample is None: - num_results_per_sample = beam_size - assert num_results_per_sample <= beam_size - - if max_time_steps is None: - max_time_steps = len(input_probs_matrix) - else: - max_time_steps = min(max_time_steps, len(input_probs_matrix)) - assert max_time_steps > 0 - - vocab_dim = len(input_probs_matrix[0]) - assert blank_id < vocab_dim - assert space_id < vocab_dim - - ## initialize - start_id = -1 - # the set containing selected prefixes - prefix_set_prev = {str(start_id): 1.0} - probs_b, probs_nb = {str(start_id): 1.0}, {str(start_id): 0.0} - - ## extend prefix in loop - for time_step in range(max_time_steps): - # the set containing candidate prefixes - prefix_set_next = {} - probs_b_cur, probs_nb_cur = {}, {} - for l in prefix_set_prev: - prob = input_probs_matrix[time_step] - - # convert ids in string to list - ids_list = ids_str2list(l) - end_id = ids_list[-1] - if not prefix_set_next.has_key(l): - probs_b_cur[l], probs_nb_cur[l] = 0.0, 0.0 - - # extend prefix by travering vocabulary - for c in range(0, vocab_dim): - if c == blank_id: - probs_b_cur[l] += prob[c] * (probs_b[l] + probs_nb[l]) - else: - l_plus = l + ' ' + str(c) - if not prefix_set_next.has_key(l_plus): - probs_b_cur[l_plus], probs_nb_cur[l_plus] = 0.0, 0.0 - - if c == end_id: - probs_nb_cur[l_plus] += prob[c] * probs_b[l] - probs_nb_cur[l] += prob[c] * probs_nb[l] - elif c == space_id: - lm = 1.0 if lang_model is None \ - else np.power(lang_model(ids_list, vocabulary), alpha) - probs_nb_cur[l_plus] += lm * prob[c] * ( - probs_b[l] + probs_nb[l]) - else: - probs_nb_cur[l_plus] += prob[c] * ( - probs_b[l] + probs_nb[l]) - # add l_plus into prefix_set_next - prefix_set_next[l_plus] = probs_nb_cur[ - l_plus] + probs_b_cur[l_plus] - # add l into prefix_set_next - prefix_set_next[l] = probs_b_cur[l] + probs_nb_cur[l] - # update probs - probs_b, probs_nb = copy.deepcopy(probs_b_cur), copy.deepcopy( - probs_nb_cur) - - ## store top beam_size prefixes - prefix_set_prev = sorted( - prefix_set_next.iteritems(), key=lambda asd: asd[1], reverse=True) - if beam_size < len(prefix_set_prev): - prefix_set_prev = prefix_set_prev[:beam_size] - prefix_set_prev = dict(prefix_set_prev) - - beam_result = [] - for (seq, prob) in prefix_set_prev.items(): - if prob > 0.0: - ids_list = ids_str2list(seq)[1:] - result = ''.join([vocabulary[ids] for ids in ids_list]) - log_prob = np.log(prob) - beam_result.append([log_prob, result]) - - ## output top beam_size decoding results - beam_result = sorted(beam_result, key=lambda asd: asd[0], reverse=True) - if num_results_per_sample < beam_size: - beam_result = beam_result[:num_results_per_sample] - return beam_result - - -def simple_test(): - - input_probs_matrix = [[0.1, 0.3, 0.6], [0.2, 0.1, 0.7], [0.5, 0.2, 0.3]] - - beam_result = ctc_beam_search_decoder( - input_probs_matrix=input_probs_matrix, - beam_size=20, - blank_id=0, - space_id=1, ) - - print "\nbeam search output:" - for result in beam_result: - print("%6f\t%s" % (result[0], ids_id2token(result[1]))) - - -if __name__ == '__main__': - simple_test() diff --git a/decoder.py b/decoder.py index 34e1715c3..91dbfc347 100755 --- a/decoder.py +++ b/decoder.py @@ -4,7 +4,8 @@ from itertools import groupby import numpy as np -from ctc_beam_search_decoder import * +import copy +import kenlm def ctc_best_path_decode(probs_seq, vocabulary): @@ -37,36 +38,165 @@ def ctc_best_path_decode(probs_seq, vocabulary): return ''.join([vocabulary[index] for index in index_list]) -def ctc_decode(probs_seq, - vocabulary, - method, - beam_size=None, - num_results_per_sample=None): +class Scorer(object): """ - CTC-like sequence decoding from a sequence of likelihood probablilites. + External defined scorer to evaluate a sentence in beam search + decoding, consisting of language model and word count. - :param probs_seq: 2-D list of probabilities over the vocabulary for each - character. Each element is a list of float probabilities - for one character. - :type probs_seq: list + :param alpha: Parameter associated with language model. + :type alpha: float + :param beta: Parameter associated with word count. + :type beta: float + :model_path: Path to load language model. + :type model_path: basestring + """ + + def __init__(self, alpha, beta, model_path): + + self._alpha = alpha + self._beta = beta + self._language_model = kenlm.LanguageModel(model_path) + + def language_model_score(self, sentence, bos=True, eos=False): + log_prob = self._language_model.score(sentence, bos, eos) + return np.power(10, log_prob) + + def word_count(self, sentence): + words = sentence.strip().split(' ') + return len(words) + + # execute evaluation + def evaluate(self, sentence, bos=True, eos=False): + lm = self.language_model_score(sentence, bos, eos) + word_count = self.word_count(sentence) + score = np.power(lm, self._alpha) \ + * np.power(word_count, self._beta) + return score + + +def ctc_beam_search_decoder(probs_seq, + beam_size, + vocabulary, + ext_scoring_func=None, + blank_id=0): + ''' + Beam search decoder for CTC-trained network, using beam search with width + beam_size to find many paths to one label, return beam_size labels in + the order of probabilities. The implementation is based on Prefix Beam + Search(https://arxiv.org/abs/1408.2873), and the unclear part is + redesigned, need to be verified. + + :param probs_seq: 2-D list with length max_time_steps, each element + is a list of normalized probabilities over vocabulary + and blank for one time step. + :type probs_seq: 2-D list + :param beam_size: Width for beam search. + :type beam_size: int :param vocabulary: Vocabulary list. :type vocabulary: list - :param method: Decoding method name, with options: "best_path". - :type method: basestring - :return: Decoding result string. - :rtype: baseline - """ + :param ext_scoring_func: External defined scoring function for + partially decoded sentence, e.g. word count + and language model. + :type external_scoring_function: function + :param blank_id: id of blank, default 0. + :type blank_id: int + :return: Decoding log probability and result string. + :rtype: list + + ''' + for prob_list in probs_seq: if not len(prob_list) == len(vocabulary) + 1: raise ValueError("probs dimension mismatchedd with vocabulary") - if method == "best_path": - return ctc_best_path_decode(probs_seq, vocabulary) - elif method == "beam_search": - return ctc_beam_search_decoder( - input_probs_matrix=probs_seq, - vocabulary=vocabulary, - beam_size=beam_size, - blank_id=len(vocabulary), - num_results_per_sample=num_results_per_sample) - else: - raise ValueError("Decoding method [%s] is not supported." % method) + + max_time_steps = len(probs_seq) + if not max_time_steps > 0: + raise ValueError("probs_seq shouldn't be empty") + + probs_dim = len(probs_seq[0]) + if not blank_id < probs_dim: + raise ValueError("blank_id shouldn't be greater than probs dimension") + + if ' ' not in vocabulary: + raise ValueError("space doesn't exist in vocabulary") + space_id = vocabulary.index(' ') + + # function to convert ids in string to list + def ids_str2list(ids_str): + ids_str = ids_str.split(' ') + ids_list = [int(elem) for elem in ids_str] + return ids_list + + # function to convert ids list to sentence + def ids2sentence(ids_list, vocab): + return ''.join([vocab[ids] for ids in ids_list]) + + ## initialize + # the set containing selected prefixes + prefix_set_prev = {'-1': 1.0} + probs_b, probs_nb = {'-1': 1.0}, {'-1': 0.0} + + ## extend prefix in loop + for time_step in range(max_time_steps): + # the set containing candidate prefixes + prefix_set_next = {} + probs_b_cur, probs_nb_cur = {}, {} + for l in prefix_set_prev: + prob = probs_seq[time_step] + + # convert ids in string to list + ids_list = ids_str2list(l) + end_id = ids_list[-1] + if not prefix_set_next.has_key(l): + probs_b_cur[l], probs_nb_cur[l] = 0.0, 0.0 + + # extend prefix by travering vocabulary + for c in range(0, probs_dim): + if c == blank_id: + probs_b_cur[l] += prob[c] * (probs_b[l] + probs_nb[l]) + else: + l_plus = l + ' ' + str(c) + if not prefix_set_next.has_key(l_plus): + probs_b_cur[l_plus], probs_nb_cur[l_plus] = 0.0, 0.0 + + if c == end_id: + probs_nb_cur[l_plus] += prob[c] * probs_b[l] + probs_nb_cur[l] += prob[c] * probs_nb[l] + elif c == space_id: + if ext_scoring_func is None: + score = 1.0 + else: + prefix_sent = ids2sentence(ids_list, vocabulary) + score = ext_scoring_func(prefix_sent) + probs_nb_cur[l_plus] += score * prob[c] * ( + probs_b[l] + probs_nb[l]) + else: + probs_nb_cur[l_plus] += prob[c] * ( + probs_b[l] + probs_nb[l]) + # add l_plus into prefix_set_next + prefix_set_next[l_plus] = probs_nb_cur[ + l_plus] + probs_b_cur[l_plus] + # add l into prefix_set_next + prefix_set_next[l] = probs_b_cur[l] + probs_nb_cur[l] + # update probs + probs_b, probs_nb = copy.deepcopy(probs_b_cur), copy.deepcopy( + probs_nb_cur) + + ## store top beam_size prefixes + prefix_set_prev = sorted( + prefix_set_next.iteritems(), key=lambda asd: asd[1], reverse=True) + if beam_size < len(prefix_set_prev): + prefix_set_prev = prefix_set_prev[:beam_size] + prefix_set_prev = dict(prefix_set_prev) + + beam_result = [] + for (seq, prob) in prefix_set_prev.items(): + if prob > 0.0: + ids_list = ids_str2list(seq)[1:] + result = ids2sentence(ids_list, vocabulary) + log_prob = np.log(prob) + beam_result.append([log_prob, result]) + + ## output top beam_size decoding results + beam_result = sorted(beam_result, key=lambda asd: asd[0], reverse=True) + return beam_result diff --git a/infer.py b/infer.py index e5ecf6f35..dc46b83e9 100644 --- a/infer.py +++ b/infer.py @@ -8,7 +8,7 @@ import argparse import gzip from audio_data_utils import DataGenerator from model import deep_speech2 -from decoder import ctc_decode +from decoder import * parser = argparse.ArgumentParser( description='Simplified version of DeepSpeech2 inference.') @@ -59,7 +59,7 @@ parser.add_argument( help="Vocabulary filepath. (default: %(default)s)") parser.add_argument( "--decode_method", - default='best_path', + default='beam_search', type=str, help="Method for ctc decoding, best_path or beam_search. (default: %(default)s)" ) @@ -69,11 +69,25 @@ parser.add_argument( type=int, help="Width for beam search decoding. (default: %(default)d)") parser.add_argument( - "--num_result_per_sample", - default=2, + "--num_results_per_sample", + default=1, type=int, - help="Number of results per given sample in beam search. (default: %(default)d)" -) + help="Number of output per sample in beam search. (default: %(default)d)") +parser.add_argument( + "--language_model_path", + default="./data/1Billion.klm", + type=str, + help="Path for language model. (default: %(default)d)") +parser.add_argument( + "--alpha", + default=0.0, + type=float, + help="Parameter associated with language model. (default: %(default)f)") +parser.add_argument( + "--beta", + default=0.0, + type=float, + help="Parameter associated with word count. (default: %(default)f)") args = parser.parse_args() @@ -135,24 +149,34 @@ def infer(): for i in xrange(0, len(infer_data)) ] - # decode and print - for i, probs in enumerate(probs_split): - best_path_transcription = ctc_decode( - probs_seq=probs, vocabulary=vocab_list, method="best_path") - target_transcription = ''.join( - [vocab_list[index] for index in infer_data[i][1]]) - print("\nTarget Transcription: %s \nBst_path Transcription: %s" % - (target_transcription, best_path_transcription)) - beam_search_transcription = ctc_decode( - probs_seq=probs, - vocabulary=vocab_list, - method="beam_search", - beam_size=args.beam_size, - num_results_per_sample=args.num_result_per_sample) - for index in range(len(beam_search_transcription)): - print("LM No, %d - %4f: %s " % - (index, beam_search_transcription[index][0], - beam_search_transcription[index][1])) + ## decode and print + # best path decode + if args.decode_method == "best_path": + for i, probs in enumerate(probs_split): + target_transcription = ''.join( + [vocab_list[index] for index in infer_data[i][1]]) + best_path_transcription = ctc_best_path_decode( + probs_seq=probs, vocabulary=vocab_list) + print("\nTarget Transcription: %s\nOutput Transcription: %s" % + (target_transcription, best_path_transcription)) + # beam search decode + elif args.decode_method == "beam_search": + for i, probs in enumerate(probs_split): + target_transcription = ''.join( + [vocab_list[index] for index in infer_data[i][1]]) + ext_scorer = Scorer(args.alpha, args.beta, args.language_model_path) + beam_search_result = ctc_beam_search_decoder( + probs_seq=probs, + vocabulary=vocab_list, + beam_size=args.beam_size, + ext_scoring_func=ext_scorer.evaluate, + blank_id=len(vocab_list)) + print("\nTarget Transcription:\t%s" % target_transcription) + for index in range(args.num_results_per_sample): + result = beam_search_result[index] + print("Beam %d: %f \t%s" % (index, result[0], result[1])) + else: + raise ValueError("Decoding method [%s] is not supported." % method) def main(): diff --git a/test_ctc_beam_search_decoder.py b/test_ctc_beam_search_decoder.py deleted file mode 100644 index f79704441..000000000 --- a/test_ctc_beam_search_decoder.py +++ /dev/null @@ -1,69 +0,0 @@ -from __future__ import absolute_import -from __future__ import print_function - -import numpy as np -import tensorflow as tf -from tensorflow.python.framework import ops -from tensorflow.python.ops import array_ops -import ctc_beam_search_decoder as tested_decoder - - -def test_beam_search_decoder(): - max_time_steps = 6 - beam_size = 20 - num_results_per_sample = 20 - - input_prob_matrix_0 = np.asarray( - [ - [0.30999, 0.309938, 0.0679938, 0.0673362, 0.0708352, 0.173908], - [0.215136, 0.439699, 0.0370931, 0.0393967, 0.0381581, 0.230517], - [0.199959, 0.489485, 0.0233221, 0.0251417, 0.0233289, 0.238763], - [0.279611, 0.452966, 0.0204795, 0.0209126, 0.0194803, 0.20655], - [0.51286, 0.288951, 0.0243026, 0.0220788, 0.0219297, 0.129878], - # Random entry added in at time=5 - [0.155251, 0.164444, 0.173517, 0.176138, 0.169979, 0.160671] - ], - dtype=np.float32) - - # Add arbitrary offset - this is fine - input_log_prob_matrix_0 = np.log(input_prob_matrix_0) #+ 2.0 - - # len max_time_steps array of batch_size x depth matrices - inputs = ([ - input_log_prob_matrix_0[t, :][np.newaxis, :] - for t in range(max_time_steps) - ]) - - inputs_t = [ops.convert_to_tensor(x) for x in inputs] - inputs_t = array_ops.stack(inputs_t) - - # run CTC beam search decoder in tensorflow - with tf.Session() as sess: - decoded, log_probabilities = tf.nn.ctc_beam_search_decoder( - inputs_t, [max_time_steps], - beam_width=beam_size, - top_paths=num_results_per_sample, - merge_repeated=False) - tf_decoded = sess.run(decoded) - tf_log_probs = sess.run(log_probabilities) - - # run tested CTC beam search decoder - beam_result = tested_decoder.ctc_beam_search_decoder( - input_probs_matrix=input_prob_matrix_0, - beam_size=beam_size, - blank_id=5, # default blank_id in tensorflow decoder is (num classes-1) - space_id=4, # doesn't matter - max_time_steps=max_time_steps, - num_results_per_sample=num_results_per_sample) - - # compare decoding result - print( - "{tf_decoder log probs} \t {tested_decoder log probs}: {tf_decoder result} {tested_decoder result}" - ) - for index in range(len(beam_result)): - print(('%6f\t%6f: ') % (tf_log_probs[0][index], beam_result[index][0]), - tf_decoded[index].values, ' ', beam_result[index][1]) - - -if __name__ == '__main__': - test_beam_search_decoder() From ac370eca850825cc3cd075f47903722e2805fc5a Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Wed, 7 Jun 2017 09:06:58 +0800 Subject: [PATCH 021/335] add annotations --- decoder.py | 11 +++++------ infer.py | 5 +++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/decoder.py b/decoder.py index 91dbfc347..e16d10544 100755 --- a/decoder.py +++ b/decoder.py @@ -68,9 +68,9 @@ class Scorer(object): # execute evaluation def evaluate(self, sentence, bos=True, eos=False): lm = self.language_model_score(sentence, bos, eos) - word_count = self.word_count(sentence) + word_cnt = self.word_count(sentence) score = np.power(lm, self._alpha) \ - * np.power(word_count, self._beta) + * np.power(word_cnt, self._beta) return score @@ -104,19 +104,18 @@ def ctc_beam_search_decoder(probs_seq, :rtype: list ''' - + # dimension check for prob_list in probs_seq: if not len(prob_list) == len(vocabulary) + 1: raise ValueError("probs dimension mismatchedd with vocabulary") - max_time_steps = len(probs_seq) - if not max_time_steps > 0: - raise ValueError("probs_seq shouldn't be empty") + # blank_id check probs_dim = len(probs_seq[0]) if not blank_id < probs_dim: raise ValueError("blank_id shouldn't be greater than probs dimension") + # assign space_id if ' ' not in vocabulary: raise ValueError("space doesn't exist in vocabulary") space_id = vocabulary.index(' ') diff --git a/infer.py b/infer.py index dc46b83e9..be7ecad9f 100644 --- a/infer.py +++ b/infer.py @@ -77,7 +77,7 @@ parser.add_argument( "--language_model_path", default="./data/1Billion.klm", type=str, - help="Path for language model. (default: %(default)d)") + help="Path for language model. (default: %(default)s)") parser.add_argument( "--alpha", default=0.0, @@ -93,7 +93,7 @@ args = parser.parse_args() def infer(): """ - Max-ctc-decoding for DeepSpeech2. + Inference for DeepSpeech2. """ # initialize data generator data_generator = DataGenerator( @@ -174,6 +174,7 @@ def infer(): print("\nTarget Transcription:\t%s" % target_transcription) for index in range(args.num_results_per_sample): result = beam_search_result[index] + #output: index, log prob, beam result print("Beam %d: %f \t%s" % (index, result[0], result[1])) else: raise ValueError("Decoding method [%s] is not supported." % method) From 21ff590e6d905c9b8d0bba5159d996b8ba23e599 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Wed, 7 Jun 2017 14:57:04 +0800 Subject: [PATCH 022/335] modify language model scoring --- decoder.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/decoder.py b/decoder.py index e16d10544..458cd9ad3 100755 --- a/decoder.py +++ b/decoder.py @@ -52,13 +52,19 @@ class Scorer(object): """ def __init__(self, alpha, beta, model_path): - self._alpha = alpha self._beta = beta self._language_model = kenlm.LanguageModel(model_path) def language_model_score(self, sentence, bos=True, eos=False): - log_prob = self._language_model.score(sentence, bos, eos) + words = sentence.strip().split(' ') + length = len(words) + if length == 1: + log_prob = self._language_model.score(sentence, bos, eos) + else: + prefix_sent = ' '.join(words[0:length - 1]) + log_prob = self._language_model.score(sentence, bos, eos) \ + - self._language_model.score(prefix_sent, bos, eos) return np.power(10, log_prob) def word_count(self, sentence): From 44efbed798966f1d57276e5fde3d8541e8fddc48 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Wed, 7 Jun 2017 16:59:11 +0800 Subject: [PATCH 023/335] rename variables in decoder --- decoder.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/decoder.py b/decoder.py index 458cd9ad3..d5bd72f6f 100755 --- a/decoder.py +++ b/decoder.py @@ -92,7 +92,7 @@ def ctc_beam_search_decoder(probs_seq, Search(https://arxiv.org/abs/1408.2873), and the unclear part is redesigned, need to be verified. - :param probs_seq: 2-D list with length max_time_steps, each element + :param probs_seq: 2-D list with length num_time_steps, each element is a list of normalized probabilities over vocabulary and blank for one time step. :type probs_seq: 2-D list @@ -114,7 +114,7 @@ def ctc_beam_search_decoder(probs_seq, for prob_list in probs_seq: if not len(prob_list) == len(vocabulary) + 1: raise ValueError("probs dimension mismatchedd with vocabulary") - max_time_steps = len(probs_seq) + num_time_steps = len(probs_seq) # blank_id check probs_dim = len(probs_seq[0]) @@ -139,10 +139,10 @@ def ctc_beam_search_decoder(probs_seq, ## initialize # the set containing selected prefixes prefix_set_prev = {'-1': 1.0} - probs_b, probs_nb = {'-1': 1.0}, {'-1': 0.0} + probs_b_prev, probs_nb_prev = {'-1': 1.0}, {'-1': 0.0} ## extend prefix in loop - for time_step in range(max_time_steps): + for time_step in range(num_time_steps): # the set containing candidate prefixes prefix_set_next = {} probs_b_cur, probs_nb_cur = {}, {} @@ -158,33 +158,34 @@ def ctc_beam_search_decoder(probs_seq, # extend prefix by travering vocabulary for c in range(0, probs_dim): if c == blank_id: - probs_b_cur[l] += prob[c] * (probs_b[l] + probs_nb[l]) + probs_b_cur[l] += prob[c] * ( + probs_b_prev[l] + probs_nb_prev[l]) else: l_plus = l + ' ' + str(c) if not prefix_set_next.has_key(l_plus): probs_b_cur[l_plus], probs_nb_cur[l_plus] = 0.0, 0.0 if c == end_id: - probs_nb_cur[l_plus] += prob[c] * probs_b[l] - probs_nb_cur[l] += prob[c] * probs_nb[l] + probs_nb_cur[l_plus] += prob[c] * probs_b_prev[l] + probs_nb_cur[l] += prob[c] * probs_nb_prev[l] elif c == space_id: if ext_scoring_func is None: score = 1.0 else: - prefix_sent = ids2sentence(ids_list, vocabulary) - score = ext_scoring_func(prefix_sent) + prefix = ids2sentence(ids_list, vocabulary) + score = ext_scoring_func(prefix) probs_nb_cur[l_plus] += score * prob[c] * ( - probs_b[l] + probs_nb[l]) + probs_b_prev[l] + probs_nb_prev[l]) else: probs_nb_cur[l_plus] += prob[c] * ( - probs_b[l] + probs_nb[l]) + probs_b_prev[l] + probs_nb_prev[l]) # add l_plus into prefix_set_next prefix_set_next[l_plus] = probs_nb_cur[ l_plus] + probs_b_cur[l_plus] # add l into prefix_set_next prefix_set_next[l] = probs_b_cur[l] + probs_nb_cur[l] # update probs - probs_b, probs_nb = copy.deepcopy(probs_b_cur), copy.deepcopy( + probs_b_prev, probs_nb_prev = copy.deepcopy(probs_b_cur), copy.deepcopy( probs_nb_cur) ## store top beam_size prefixes From b046e651e7d41a8332fc49096383d5777f2dc2c2 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Wed, 7 Jun 2017 17:43:12 +0800 Subject: [PATCH 024/335] tiny modify to pass CI --- decoder.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/decoder.py b/decoder.py index d5bd72f6f..b7ed40455 100755 --- a/decoder.py +++ b/decoder.py @@ -56,6 +56,7 @@ class Scorer(object): self._beta = beta self._language_model = kenlm.LanguageModel(model_path) + # language model scoring def language_model_score(self, sentence, bos=True, eos=False): words = sentence.strip().split(' ') length = len(words) @@ -67,6 +68,7 @@ class Scorer(object): - self._language_model.score(prefix_sent, bos, eos) return np.power(10, log_prob) + # word insertion term def word_count(self, sentence): words = sentence.strip().split(' ') return len(words) From 9fda521ee3e067291560c7f4816d0540d808fb22 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Wed, 7 Jun 2017 19:24:04 +0800 Subject: [PATCH 025/335] improve external scorer --- decoder.py | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/decoder.py b/decoder.py index b7ed40455..05400d1b4 100755 --- a/decoder.py +++ b/decoder.py @@ -6,6 +6,7 @@ from itertools import groupby import numpy as np import copy import kenlm +import os def ctc_best_path_decode(probs_seq, vocabulary): @@ -54,19 +55,16 @@ class Scorer(object): def __init__(self, alpha, beta, model_path): self._alpha = alpha self._beta = beta + if not os.path.isfile(model_path): + raise IOError("Invaid language model path: %s" % model_path) self._language_model = kenlm.LanguageModel(model_path) - # language model scoring - def language_model_score(self, sentence, bos=True, eos=False): - words = sentence.strip().split(' ') - length = len(words) - if length == 1: - log_prob = self._language_model.score(sentence, bos, eos) - else: - prefix_sent = ' '.join(words[0:length - 1]) - log_prob = self._language_model.score(sentence, bos, eos) \ - - self._language_model.score(prefix_sent, bos, eos) - return np.power(10, log_prob) + # n-gram language model scoring + def language_model_score(self, sentence): + #log prob of last word + log_cond_prob = list( + self._language_model.full_scores(sentence, eos=False))[-1][0] + return np.power(10, log_cond_prob) # word insertion term def word_count(self, sentence): @@ -74,8 +72,8 @@ class Scorer(object): return len(words) # execute evaluation - def evaluate(self, sentence, bos=True, eos=False): - lm = self.language_model_score(sentence, bos, eos) + def evaluate(self, sentence): + lm = self.language_model_score(sentence) word_cnt = self.word_count(sentence) score = np.power(lm, self._alpha) \ * np.power(word_cnt, self._beta) From 453f038df91fc56ea24ff09e85def14194f32ee7 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Thu, 8 Jun 2017 16:05:40 +0800 Subject: [PATCH 026/335] optimize the efficiency of beam search --- decoder.py | 41 +++++++++++------------------------------ 1 file changed, 11 insertions(+), 30 deletions(-) diff --git a/decoder.py b/decoder.py index 05400d1b4..0eab36519 100755 --- a/decoder.py +++ b/decoder.py @@ -121,25 +121,10 @@ def ctc_beam_search_decoder(probs_seq, if not blank_id < probs_dim: raise ValueError("blank_id shouldn't be greater than probs dimension") - # assign space_id - if ' ' not in vocabulary: - raise ValueError("space doesn't exist in vocabulary") - space_id = vocabulary.index(' ') - - # function to convert ids in string to list - def ids_str2list(ids_str): - ids_str = ids_str.split(' ') - ids_list = [int(elem) for elem in ids_str] - return ids_list - - # function to convert ids list to sentence - def ids2sentence(ids_list, vocab): - return ''.join([vocab[ids] for ids in ids_list]) - ## initialize # the set containing selected prefixes - prefix_set_prev = {'-1': 1.0} - probs_b_prev, probs_nb_prev = {'-1': 1.0}, {'-1': 0.0} + prefix_set_prev = {'\t': 1.0} + probs_b_prev, probs_nb_prev = {'\t': 1.0}, {'\t': 0.0} ## extend prefix in loop for time_step in range(num_time_steps): @@ -148,10 +133,6 @@ def ctc_beam_search_decoder(probs_seq, probs_b_cur, probs_nb_cur = {}, {} for l in prefix_set_prev: prob = probs_seq[time_step] - - # convert ids in string to list - ids_list = ids_str2list(l) - end_id = ids_list[-1] if not prefix_set_next.has_key(l): probs_b_cur[l], probs_nb_cur[l] = 0.0, 0.0 @@ -161,18 +142,20 @@ def ctc_beam_search_decoder(probs_seq, probs_b_cur[l] += prob[c] * ( probs_b_prev[l] + probs_nb_prev[l]) else: - l_plus = l + ' ' + str(c) + last_char = l[-1] + new_char = vocabulary[c] + l_plus = l + new_char if not prefix_set_next.has_key(l_plus): probs_b_cur[l_plus], probs_nb_cur[l_plus] = 0.0, 0.0 - if c == end_id: + if new_char == last_char: probs_nb_cur[l_plus] += prob[c] * probs_b_prev[l] probs_nb_cur[l] += prob[c] * probs_nb_prev[l] - elif c == space_id: - if ext_scoring_func is None: + elif new_char == ' ': + if (ext_scoring_func is None) or (len(l) == 1): score = 1.0 else: - prefix = ids2sentence(ids_list, vocabulary) + prefix = l[1:] score = ext_scoring_func(prefix) probs_nb_cur[l_plus] += score * prob[c] * ( probs_b_prev[l] + probs_nb_prev[l]) @@ -185,8 +168,7 @@ def ctc_beam_search_decoder(probs_seq, # add l into prefix_set_next prefix_set_next[l] = probs_b_cur[l] + probs_nb_cur[l] # update probs - probs_b_prev, probs_nb_prev = copy.deepcopy(probs_b_cur), copy.deepcopy( - probs_nb_cur) + probs_b_prev, probs_nb_prev = probs_b_cur, probs_nb_cur ## store top beam_size prefixes prefix_set_prev = sorted( @@ -198,8 +180,7 @@ def ctc_beam_search_decoder(probs_seq, beam_result = [] for (seq, prob) in prefix_set_prev.items(): if prob > 0.0: - ids_list = ids_str2list(seq)[1:] - result = ids2sentence(ids_list, vocabulary) + result = seq[1:] log_prob = np.log(prob) beam_result.append([log_prob, result]) From ae83a25affafda71f004538b72309c5043f6667b Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Mon, 12 Jun 2017 17:13:48 +0800 Subject: [PATCH 027/335] add beam search decoder using multiprocesses --- decoder.py | 54 +++++++++++++++++++++++++++++++++++++++++++++++++++++- infer.py | 43 ++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 93 insertions(+), 4 deletions(-) diff --git a/decoder.py b/decoder.py index 0eab36519..fc746c705 100755 --- a/decoder.py +++ b/decoder.py @@ -2,11 +2,12 @@ CTC-like decoder utilitis. """ +import os from itertools import groupby import numpy as np import copy import kenlm -import os +import multiprocessing def ctc_best_path_decode(probs_seq, vocabulary): @@ -187,3 +188,54 @@ def ctc_beam_search_decoder(probs_seq, ## output top beam_size decoding results beam_result = sorted(beam_result, key=lambda asd: asd[0], reverse=True) return beam_result + + +def ctc_beam_search_decoder_nproc(probs_split, + beam_size, + vocabulary, + ext_scoring_func=None, + blank_id=0, + num_processes=None): + ''' + Beam search decoder using multiple processes. + + :param probs_seq: 3-D list with length num_time_steps, each element + is a 2-D list of probabilities can be used by + ctc_beam_search_decoder. + + :type probs_seq: 3-D list + :param beam_size: Width for beam search. + :type beam_size: int + :param vocabulary: Vocabulary list. + :type vocabulary: list + :param ext_scoring_func: External defined scoring function for + partially decoded sentence, e.g. word count + and language model. + :type external_scoring_function: function + :param blank_id: id of blank, default 0. + :type blank_id: int + :param num_processes: Number of processes, default None, equal to the + number of CPUs. + :type num_processes: int + :return: Decoding log probability and result string. + :rtype: list + + ''' + + if num_processes is None: + num_processes = multiprocessing.cpu_count() + if not num_processes > 0: + raise ValueError("Number of processes must be positive!") + + pool = multiprocessing.Pool(processes=num_processes) + results = [] + for i, probs_list in enumerate(probs_split): + args = (probs_list, beam_size, vocabulary, ext_scoring_func, blank_id) + results.append(pool.apply_async(ctc_beam_search_decoder, args)) + + pool.close() + pool.join() + beam_search_results = [] + for result in results: + beam_search_results.append(result.get()) + return beam_search_results diff --git a/infer.py b/infer.py index be7ecad9f..377aeb73c 100644 --- a/infer.py +++ b/infer.py @@ -9,6 +9,7 @@ import gzip from audio_data_utils import DataGenerator from model import deep_speech2 from decoder import * +from error_rate import wer parser = argparse.ArgumentParser( description='Simplified version of DeepSpeech2 inference.') @@ -59,9 +60,9 @@ parser.add_argument( help="Vocabulary filepath. (default: %(default)s)") parser.add_argument( "--decode_method", - default='beam_search', + default='beam_search_nproc', type=str, - help="Method for ctc decoding, best_path or beam_search. (default: %(default)s)" + help="Method for ctc decoding, best_path, beam_search or beam_search_nproc. (default: %(default)s)" ) parser.add_argument( "--beam_size", @@ -151,6 +152,7 @@ def infer(): ## decode and print # best path decode + wer_sum, wer_counter = 0, 0 if args.decode_method == "best_path": for i, probs in enumerate(probs_split): target_transcription = ''.join( @@ -159,12 +161,17 @@ def infer(): probs_seq=probs, vocabulary=vocab_list) print("\nTarget Transcription: %s\nOutput Transcription: %s" % (target_transcription, best_path_transcription)) + wer_cur = wer(target_transcription, best_path_transcription) + wer_sum += wer_cur + wer_counter += 1 + print("cur wer = %f, average wer = %f" % + (wer_cur, wer_sum / wer_counter)) # beam search decode elif args.decode_method == "beam_search": + ext_scorer = Scorer(args.alpha, args.beta, args.language_model_path) for i, probs in enumerate(probs_split): target_transcription = ''.join( [vocab_list[index] for index in infer_data[i][1]]) - ext_scorer = Scorer(args.alpha, args.beta, args.language_model_path) beam_search_result = ctc_beam_search_decoder( probs_seq=probs, vocabulary=vocab_list, @@ -172,10 +179,40 @@ def infer(): ext_scoring_func=ext_scorer.evaluate, blank_id=len(vocab_list)) print("\nTarget Transcription:\t%s" % target_transcription) + + for index in range(args.num_results_per_sample): + result = beam_search_result[index] + #output: index, log prob, beam result + print("Beam %d: %f \t%s" % (index, result[0], result[1])) + wer_cur = wer(target_transcription, beam_search_result[0][1]) + wer_sum += wer_cur + wer_counter += 1 + print("cur wer = %f , average wer = %f" % + (wer_cur, wer_sum / wer_counter)) + # beam search in multiple processes + elif args.decode_method == "beam_search_nproc": + ext_scorer = Scorer(args.alpha, args.beta, args.language_model_path) + beam_search_nproc_results = ctc_beam_search_decoder_nproc( + probs_split=probs_split, + vocabulary=vocab_list, + beam_size=args.beam_size, + #ext_scoring_func=ext_scorer.evaluate, + ext_scoring_func=None, + blank_id=len(vocab_list)) + for i, beam_search_result in enumerate(beam_search_nproc_results): + target_transcription = ''.join( + [vocab_list[index] for index in infer_data[i][1]]) + print("\nTarget Transcription:\t%s" % target_transcription) + for index in range(args.num_results_per_sample): result = beam_search_result[index] #output: index, log prob, beam result print("Beam %d: %f \t%s" % (index, result[0], result[1])) + wer_cur = wer(target_transcription, beam_search_result[0][1]) + wer_sum += wer_cur + wer_counter += 1 + print("cur wer = %f , average wer = %f" % + (wer_cur, wer_sum / wer_counter)) else: raise ValueError("Decoding method [%s] is not supported." % method) From bb34e90398b71fca0c1e9ff88ab21e069db001ba Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Mon, 12 Jun 2017 17:20:22 +0800 Subject: [PATCH 028/335] correct typos in annotations --- decoder.py | 2 +- infer.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/decoder.py b/decoder.py index fc746c705..96e911811 100755 --- a/decoder.py +++ b/decoder.py @@ -199,7 +199,7 @@ def ctc_beam_search_decoder_nproc(probs_split, ''' Beam search decoder using multiple processes. - :param probs_seq: 3-D list with length num_time_steps, each element + :param probs_seq: 3-D list with length batch_size, each element is a 2-D list of probabilities can be used by ctc_beam_search_decoder. diff --git a/infer.py b/infer.py index 377aeb73c..0be89e617 100644 --- a/infer.py +++ b/infer.py @@ -189,7 +189,7 @@ def infer(): wer_counter += 1 print("cur wer = %f , average wer = %f" % (wer_cur, wer_sum / wer_counter)) - # beam search in multiple processes + # beam search using multiple processes elif args.decode_method == "beam_search_nproc": ext_scorer = Scorer(args.alpha, args.beta, args.language_model_path) beam_search_nproc_results = ctc_beam_search_decoder_nproc( From c25c62b8f9544e488bdc696c6e8021e09661eb42 Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Mon, 12 Jun 2017 19:06:55 +0800 Subject: [PATCH 029/335] refine audio_data_utils.py --- audio_data_utils.py | 68 +++++++++++++++++++++------------------------ 1 file changed, 31 insertions(+), 37 deletions(-) diff --git a/audio_data_utils.py b/audio_data_utils.py index abb7f1e99..692a42809 100644 --- a/audio_data_utils.py +++ b/audio_data_utils.py @@ -247,22 +247,25 @@ class DataGenerator(object): new_batch.append((padded_audio, text)) return new_batch - def __batch_shuffle__(self, manifest, batch_size): + def __batch_shuffle__(self, manifest, batch_shuffle_size): """ 1. Sort the audio clips by duration. - 2. Generate a random number `k`, k in [0, batch_size). + 2. Generate a random number `k`, k in [0, batch_shuffle_size). 3. Randomly remove `k` instances in order to make different mini-batches, - then make minibatches and each minibatch size is batch_size. + then make minibatches and each minibatch size is batch_shuffle_size. 4. Shuffle the minibatches. :param manifest: manifest file. :type manifest: list - :param batch_size: batch size. - :type batch_size: int + :param batch_shuffle_size: This size is uesed to generate a random number, + it usually equals to batch size. + :type batch_shuffle_size: int + :return: batch shuffled mainifest. + :rtype: list """ manifest.sort(key=lambda x: x["duration"]) - shift_len = self.__random__.randint(0, batch_size - 1) - batch_manifest = zip(*[iter(manifest[shift_len:])] * batch_size) + shift_len = self.__random__.randint(0, batch_shuffle_size - 1) + batch_manifest = zip(*[iter(manifest[shift_len:])] * batch_shuffle_size) self.__random__.shuffle(batch_manifest) batch_manifest = list(sum(batch_manifest, ())) res_len = len(manifest) - shift_len - len(batch_manifest) @@ -270,11 +273,7 @@ class DataGenerator(object): batch_manifest.extend(manifest[0:shift_len]) return batch_manifest - def instance_reader_creator(self, - manifest_path, - batch_size, - sortagrad=True, - shuffle=False): + def instance_reader_creator(self, manifest): """ Instance reader creator for audio data. Creat a callable function to produce instances of data. @@ -282,35 +281,19 @@ class DataGenerator(object): Instance: a tuple of a numpy ndarray of audio spectrogram and a list of tokenized and indexed transcription text. - :param manifest_path: Filepath of manifest for audio clip files. - :type manifest_path: basestring - :param sortagrad: Sort the audio clips by duration in the first epoc - if set True. - :type sortagrad: bool - :param shuffle: Shuffle the audio clips if set True. - :type shuffle: bool + :param manifest: Filepath of manifest for audio clip files. + :type manifest: basestring :return: Data reader function. :rtype: callable """ def reader(): - # read manifest - manifest = self.__read_manifest__( - manifest_path=manifest_path, - max_duration=self.__max_duration__, - min_duration=self.__min_duration__) - # sort (by duration) or shuffle manifest - if self.__epoc__ == 0 and sortagrad: - manifest.sort(key=lambda x: x["duration"]) - elif shuffle: - manifest = self.__batch_shuffle__(manifest, batch_size) # extract spectrogram feature for instance in manifest: spectrogram = self.__audio_featurize__( instance["audio_filepath"]) transcript = self.__text_featurize__(instance["text"]) yield (spectrogram, transcript) - self.__epoc__ += 1 return reader @@ -320,7 +303,7 @@ class DataGenerator(object): padding_to=-1, flatten=False, sortagrad=False, - shuffle=False): + batch_shuffle=False): """ Batch data reader creator for audio data. Creat a callable function to produce batches of data. @@ -343,18 +326,28 @@ class DataGenerator(object): :param sortagrad: Sort the audio clips by duration in the first epoc if set True. :type sortagrad: bool - :param shuffle: Shuffle the audio clips if set True. - :type shuffle: bool + :param batch_shuffle: Shuffle the audio clips if set True. It is + not a thorough instance-wise shuffle, + but a specific batch-wise shuffle. + :type batch_shuffle: bool :return: Batch reader function, producing batches of data when called. :rtype: callable """ def batch_reader(): - instance_reader = self.instance_reader_creator( + # read manifest + manifest = self.__read_manifest__( manifest_path=manifest_path, - batch_size=batch_size, - sortagrad=sortagrad, - shuffle=shuffle) + max_duration=self.__max_duration__, + min_duration=self.__min_duration__) + + # sort (by duration) or shuffle manifest + if self.__epoc__ == 0 and sortagrad: + manifest.sort(key=lambda x: x["duration"]) + elif batch_shuffle: + manifest = self.__batch_shuffle__(manifest, batch_size) + + instance_reader = self.instance_reader_creator(manifest) batch = [] for instance in instance_reader(): batch.append(instance) @@ -363,6 +356,7 @@ class DataGenerator(object): batch = [] if len(batch) > 0: yield self.__padding_batch__(batch, padding_to, flatten) + self.__epoc__ += 1 return batch_reader From 9c27b1d14e601ff64df6e5dacc95d77933e2b39a Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Mon, 12 Jun 2017 19:53:41 +0800 Subject: [PATCH 030/335] add more comments and update train.py --- audio_data_utils.py | 30 ++++++++++++++++++++---------- train.py | 6 +++--- 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/audio_data_utils.py b/audio_data_utils.py index 692a42809..1cd29be11 100644 --- a/audio_data_utils.py +++ b/audio_data_utils.py @@ -247,25 +247,34 @@ class DataGenerator(object): new_batch.append((padded_audio, text)) return new_batch - def __batch_shuffle__(self, manifest, batch_shuffle_size): + def __batch_shuffle__(self, manifest, batch_size): """ + The instances have different lengths and they cannot be + combined into a single matrix multiplication. It usually + sorts the training examples by length and combines only + similarly-sized instances into minibatches, pads with + silence when necessary so that all instances in a batch + have the same length. This batch shuffle fuction is used + to make similarly-sized instances into minibatches and + make a batch-wise shuffle. + 1. Sort the audio clips by duration. - 2. Generate a random number `k`, k in [0, batch_shuffle_size). + 2. Generate a random number `k`, k in [0, batch_size). 3. Randomly remove `k` instances in order to make different mini-batches, - then make minibatches and each minibatch size is batch_shuffle_size. + then make minibatches and each minibatch size is batch_size. 4. Shuffle the minibatches. :param manifest: manifest file. :type manifest: list - :param batch_shuffle_size: This size is uesed to generate a random number, - it usually equals to batch size. - :type batch_shuffle_size: int + :param batch_size: Batch size. This size is also used for generate + a random number for batch shuffle. + :type batch_size: int :return: batch shuffled mainifest. :rtype: list """ manifest.sort(key=lambda x: x["duration"]) - shift_len = self.__random__.randint(0, batch_shuffle_size - 1) - batch_manifest = zip(*[iter(manifest[shift_len:])] * batch_shuffle_size) + shift_len = self.__random__.randint(0, batch_size - 1) + batch_manifest = zip(*[iter(manifest[shift_len:])] * batch_size) self.__random__.shuffle(batch_manifest) batch_manifest = list(sum(batch_manifest, ())) res_len = len(manifest) - shift_len - len(batch_manifest) @@ -327,8 +336,9 @@ class DataGenerator(object): if set True. :type sortagrad: bool :param batch_shuffle: Shuffle the audio clips if set True. It is - not a thorough instance-wise shuffle, - but a specific batch-wise shuffle. + not a thorough instance-wise shuffle, but a + specific batch-wise shuffle. For more details, + please see `__batch_shuffle__` function. :type batch_shuffle: bool :return: Batch reader function, producing batches of data when called. :rtype: callable diff --git a/train.py b/train.py index eb9b56de7..957c24267 100644 --- a/train.py +++ b/train.py @@ -143,12 +143,12 @@ def train(): train_batch_reader = train_generator.batch_reader_creator( manifest_path=args.train_manifest_path, batch_size=args.batch_size, - sortagrad=True, - shuffle=True) + sortagrad=True if args.init_model_path is None else False, + batch_shuffle=True) test_batch_reader = test_generator.batch_reader_creator( manifest_path=args.dev_manifest_path, batch_size=args.batch_size, - shuffle=False) + batch_shuffle=False) feeding = train_generator.data_name_feeding() # create event handler From cd3617aeb4df0dbe998060ba410c782856b2abf3 Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Mon, 12 Jun 2017 23:19:40 +0800 Subject: [PATCH 031/335] Refactor whole data preprocessor for DS2 (re-design classes, re-organize dir, add augmentaion interfaces etc.). 1. Refactor data preprocessor with new added class AudioSegment, SpeechSegment, TextFeaturizer, AudioFeaturizer, SpeechFeaturizer. 2. Add data augmentation interfaces and class AugmentorBase, AugmentationPipeline, VolumnPerturbAugmentor etc.. 3. Seperate normalizer's mean and std computing from training, by adding FeatureNormalizer and a seperate tool compute_mean_std.py. 4. Re-organize directory. --- audio_data_utils.py | 411 ------------------ compute_mean_std.py | 56 +++ data_utils/__init__.py | 0 data_utils/audio.py | 68 +++ data_utils/augmentor/__init__.py | 0 data_utils/augmentor/augmentation.py | 38 ++ data_utils/augmentor/base.py | 17 + data_utils/augmentor/volumn_perturb.py | 17 + data_utils/data.py | 247 +++++++++++ data_utils/featurizer/__init__.py | 0 data_utils/featurizer/audio_featurizer.py | 86 ++++ data_utils/featurizer/speech_featurizer.py | 32 ++ data_utils/featurizer/text_featurizer.py | 39 ++ data_utils/normalizer.py | 49 +++ data_utils/utils.py | 19 + {data => datasets/librispeech}/librispeech.py | 2 +- datasets/run_all.sh | 13 + {data => datasets/vocab}/eng_vocab.txt | 0 infer.py | 61 ++- train.py | 74 ++-- 20 files changed, 750 insertions(+), 479 deletions(-) delete mode 100644 audio_data_utils.py create mode 100755 compute_mean_std.py create mode 100755 data_utils/__init__.py create mode 100755 data_utils/audio.py create mode 100755 data_utils/augmentor/__init__.py create mode 100755 data_utils/augmentor/augmentation.py create mode 100755 data_utils/augmentor/base.py create mode 100755 data_utils/augmentor/volumn_perturb.py create mode 100644 data_utils/data.py create mode 100755 data_utils/featurizer/__init__.py create mode 100755 data_utils/featurizer/audio_featurizer.py create mode 100755 data_utils/featurizer/speech_featurizer.py create mode 100755 data_utils/featurizer/text_featurizer.py create mode 100755 data_utils/normalizer.py create mode 100755 data_utils/utils.py rename {data => datasets/librispeech}/librispeech.py (99%) create mode 100755 datasets/run_all.sh rename {data => datasets/vocab}/eng_vocab.txt (100%) diff --git a/audio_data_utils.py b/audio_data_utils.py deleted file mode 100644 index 1cd29be11..000000000 --- a/audio_data_utils.py +++ /dev/null @@ -1,411 +0,0 @@ -""" - Providing basic audio data preprocessing pipeline, and offering - both instance-level and batch-level data reader interfaces. -""" -import paddle.v2 as paddle -import logging -import json -import random -import soundfile -import numpy as np -import itertools -import os - -RANDOM_SEED = 0 -logger = logging.getLogger(__name__) - - -class DataGenerator(object): - """ - DataGenerator provides basic audio data preprocessing pipeline, and offers - both instance-level and batch-level data reader interfaces. - Normalized FFT are used as audio features here. - - :param vocab_filepath: Vocabulary file path for indexing tokenized - transcriptions. - :type vocab_filepath: basestring - :param normalizer_manifest_path: Manifest filepath for collecting feature - normalization statistics, e.g. mean, std. - :type normalizer_manifest_path: basestring - :param normalizer_num_samples: Number of instances sampled for collecting - feature normalization statistics. - Default is 100. - :type normalizer_num_samples: int - :param max_duration: Audio clips with duration (in seconds) greater than - this will be discarded. Default is 20.0. - :type max_duration: float - :param min_duration: Audio clips with duration (in seconds) smaller than - this will be discarded. Default is 0.0. - :type min_duration: float - :param stride_ms: Striding size (in milliseconds) for generating frames. - Default is 10.0. - :type stride_ms: float - :param window_ms: Window size (in milliseconds) for frames. Default is 20.0. - :type window_ms: float - :param max_frequency: Maximun frequency for FFT features. FFT features of - frequency larger than this will be discarded. - If set None, all features will be kept. - Default is None. - :type max_frequency: float - """ - - def __init__(self, - vocab_filepath, - normalizer_manifest_path, - normalizer_num_samples=100, - max_duration=20.0, - min_duration=0.0, - stride_ms=10.0, - window_ms=20.0, - max_frequency=None): - self.__max_duration__ = max_duration - self.__min_duration__ = min_duration - self.__stride_ms__ = stride_ms - self.__window_ms__ = window_ms - self.__max_frequency__ = max_frequency - self.__epoc__ = 0 - self.__random__ = random.Random(RANDOM_SEED) - # load vocabulary (dictionary) - self.__vocab_dict__, self.__vocab_list__ = \ - self.__load_vocabulary_from_file__(vocab_filepath) - # collect normalizer statistics - self.__mean__, self.__std__ = self.__collect_normalizer_statistics__( - manifest_path=normalizer_manifest_path, - num_samples=normalizer_num_samples) - - def __audio_featurize__(self, audio_filename): - """ - Preprocess audio data, including feature extraction, normalization etc.. - """ - features = self.__audio_basic_featurize__(audio_filename) - return self.__normalize__(features) - - def __text_featurize__(self, text): - """ - Preprocess text data, including tokenizing and token indexing etc.. - """ - return self.__convert_text_to_char_index__( - text=text, vocabulary=self.__vocab_dict__) - - def __audio_basic_featurize__(self, audio_filename): - """ - Compute basic (without normalization etc.) features for audio data. - """ - return self.__spectrogram_from_file__( - filename=audio_filename, - stride_ms=self.__stride_ms__, - window_ms=self.__window_ms__, - max_freq=self.__max_frequency__) - - def __collect_normalizer_statistics__(self, manifest_path, num_samples=100): - """ - Compute feature normalization statistics, i.e. mean and stddev. - """ - # read manifest - manifest = self.__read_manifest__( - manifest_path=manifest_path, - max_duration=self.__max_duration__, - min_duration=self.__min_duration__) - # sample for statistics - sampled_manifest = self.__random__.sample(manifest, num_samples) - # extract spectrogram feature - features = [] - for instance in sampled_manifest: - spectrogram = self.__audio_basic_featurize__( - instance["audio_filepath"]) - features.append(spectrogram) - features = np.hstack(features) - mean = np.mean(features, axis=1).reshape([-1, 1]) - std = np.std(features, axis=1).reshape([-1, 1]) - return mean, std - - def __normalize__(self, features, eps=1e-14): - """ - Normalize features to be of zero mean and unit stddev. - """ - return (features - self.__mean__) / (self.__std__ + eps) - - def __spectrogram_from_file__(self, - filename, - stride_ms=10.0, - window_ms=20.0, - max_freq=None, - eps=1e-14): - """ - Laod audio data and calculate the log of spectrogram by FFT. - Refer to utils.py in https://github.com/baidu-research/ba-dls-deepspeech - """ - audio, sample_rate = soundfile.read(filename) - if audio.ndim >= 2: - audio = np.mean(audio, 1) - if max_freq is None: - max_freq = sample_rate / 2 - if max_freq > sample_rate / 2: - raise ValueError("max_freq must be greater than half of " - "sample rate.") - if stride_ms > window_ms: - raise ValueError("Stride size must not be greater than " - "window size.") - stride_size = int(0.001 * sample_rate * stride_ms) - window_size = int(0.001 * sample_rate * window_ms) - spectrogram, freqs = self.__extract_spectrogram__( - audio, - window_size=window_size, - stride_size=stride_size, - sample_rate=sample_rate) - ind = np.where(freqs <= max_freq)[0][-1] + 1 - return np.log(spectrogram[:ind, :] + eps) - - def __extract_spectrogram__(self, samples, window_size, stride_size, - sample_rate): - """ - Compute the spectrogram by FFT for a discrete real signal. - Refer to utils.py in https://github.com/baidu-research/ba-dls-deepspeech - """ - # extract strided windows - truncate_size = (len(samples) - window_size) % stride_size - samples = samples[:len(samples) - truncate_size] - nshape = (window_size, (len(samples) - window_size) // stride_size + 1) - nstrides = (samples.strides[0], samples.strides[0] * stride_size) - windows = np.lib.stride_tricks.as_strided( - samples, shape=nshape, strides=nstrides) - assert np.all( - windows[:, 1] == samples[stride_size:(stride_size + window_size)]) - # window weighting, squared Fast Fourier Transform (fft), scaling - weighting = np.hanning(window_size)[:, None] - fft = np.fft.rfft(windows * weighting, axis=0) - fft = np.absolute(fft)**2 - scale = np.sum(weighting**2) * sample_rate - fft[1:-1, :] *= (2.0 / scale) - fft[(0, -1), :] /= scale - # prepare fft frequency list - freqs = float(sample_rate) / window_size * np.arange(fft.shape[0]) - return fft, freqs - - def __load_vocabulary_from_file__(self, vocabulary_path): - """ - Load vocabulary from file. - """ - if not os.path.exists(vocabulary_path): - raise ValueError("Vocabulary file %s not found.", vocabulary_path) - vocab_lines = [] - with open(vocabulary_path, 'r') as file: - vocab_lines.extend(file.readlines()) - vocab_list = [line[:-1] for line in vocab_lines] - vocab_dict = dict( - [(token, id) for (id, token) in enumerate(vocab_list)]) - return vocab_dict, vocab_list - - def __convert_text_to_char_index__(self, text, vocabulary): - """ - Convert text string to a list of character index integers. - """ - return [vocabulary[w] for w in text] - - def __read_manifest__(self, manifest_path, max_duration, min_duration): - """ - Load and parse manifest file. - """ - manifest = [] - for json_line in open(manifest_path): - try: - json_data = json.loads(json_line) - except Exception as e: - raise ValueError("Error reading manifest: %s" % str(e)) - if (json_data["duration"] <= max_duration and - json_data["duration"] >= min_duration): - manifest.append(json_data) - return manifest - - def __padding_batch__(self, batch, padding_to=-1, flatten=False): - """ - Padding audio part of features (only in the time axis -- column axis) - with zeros, to make each instance in the batch share the same - audio feature shape. - - If `padding_to` is set -1, the maximun column numbers in the batch will - be used as the target size. Otherwise, `padding_to` will be the target - size. Default is -1. - - If `flatten` is set True, audio data will be flatten to be a 1-dim - ndarray. Default is False. - """ - new_batch = [] - # get target shape - max_length = max([audio.shape[1] for audio, text in batch]) - if padding_to != -1: - if padding_to < max_length: - raise ValueError("If padding_to is not -1, it should be greater" - " or equal to the original instance length.") - max_length = padding_to - # padding - for audio, text in batch: - padded_audio = np.zeros([audio.shape[0], max_length]) - padded_audio[:, :audio.shape[1]] = audio - if flatten: - padded_audio = padded_audio.flatten() - new_batch.append((padded_audio, text)) - return new_batch - - def __batch_shuffle__(self, manifest, batch_size): - """ - The instances have different lengths and they cannot be - combined into a single matrix multiplication. It usually - sorts the training examples by length and combines only - similarly-sized instances into minibatches, pads with - silence when necessary so that all instances in a batch - have the same length. This batch shuffle fuction is used - to make similarly-sized instances into minibatches and - make a batch-wise shuffle. - - 1. Sort the audio clips by duration. - 2. Generate a random number `k`, k in [0, batch_size). - 3. Randomly remove `k` instances in order to make different mini-batches, - then make minibatches and each minibatch size is batch_size. - 4. Shuffle the minibatches. - - :param manifest: manifest file. - :type manifest: list - :param batch_size: Batch size. This size is also used for generate - a random number for batch shuffle. - :type batch_size: int - :return: batch shuffled mainifest. - :rtype: list - """ - manifest.sort(key=lambda x: x["duration"]) - shift_len = self.__random__.randint(0, batch_size - 1) - batch_manifest = zip(*[iter(manifest[shift_len:])] * batch_size) - self.__random__.shuffle(batch_manifest) - batch_manifest = list(sum(batch_manifest, ())) - res_len = len(manifest) - shift_len - len(batch_manifest) - batch_manifest.extend(manifest[-res_len:]) - batch_manifest.extend(manifest[0:shift_len]) - return batch_manifest - - def instance_reader_creator(self, manifest): - """ - Instance reader creator for audio data. Creat a callable function to - produce instances of data. - - Instance: a tuple of a numpy ndarray of audio spectrogram and a list of - tokenized and indexed transcription text. - - :param manifest: Filepath of manifest for audio clip files. - :type manifest: basestring - :return: Data reader function. - :rtype: callable - """ - - def reader(): - # extract spectrogram feature - for instance in manifest: - spectrogram = self.__audio_featurize__( - instance["audio_filepath"]) - transcript = self.__text_featurize__(instance["text"]) - yield (spectrogram, transcript) - - return reader - - def batch_reader_creator(self, - manifest_path, - batch_size, - padding_to=-1, - flatten=False, - sortagrad=False, - batch_shuffle=False): - """ - Batch data reader creator for audio data. Creat a callable function to - produce batches of data. - - Audio features will be padded with zeros to make each instance in the - batch to share the same audio feature shape. - - :param manifest_path: Filepath of manifest for audio clip files. - :type manifest_path: basestring - :param batch_size: Instance number in a batch. - :type batch_size: int - :param padding_to: If set -1, the maximun column numbers in the batch - will be used as the target size for padding. - Otherwise, `padding_to` will be the target size. - Default is -1. - :type padding_to: int - :param flatten: If set True, audio data will be flatten to be a 1-dim - ndarray. Otherwise, 2-dim ndarray. Default is False. - :type flatten: bool - :param sortagrad: Sort the audio clips by duration in the first epoc - if set True. - :type sortagrad: bool - :param batch_shuffle: Shuffle the audio clips if set True. It is - not a thorough instance-wise shuffle, but a - specific batch-wise shuffle. For more details, - please see `__batch_shuffle__` function. - :type batch_shuffle: bool - :return: Batch reader function, producing batches of data when called. - :rtype: callable - """ - - def batch_reader(): - # read manifest - manifest = self.__read_manifest__( - manifest_path=manifest_path, - max_duration=self.__max_duration__, - min_duration=self.__min_duration__) - - # sort (by duration) or shuffle manifest - if self.__epoc__ == 0 and sortagrad: - manifest.sort(key=lambda x: x["duration"]) - elif batch_shuffle: - manifest = self.__batch_shuffle__(manifest, batch_size) - - instance_reader = self.instance_reader_creator(manifest) - batch = [] - for instance in instance_reader(): - batch.append(instance) - if len(batch) == batch_size: - yield self.__padding_batch__(batch, padding_to, flatten) - batch = [] - if len(batch) > 0: - yield self.__padding_batch__(batch, padding_to, flatten) - self.__epoc__ += 1 - - return batch_reader - - def vocabulary_size(self): - """ - Get vocabulary size. - - :return: Vocabulary size. - :rtype: int - """ - return len(self.__vocab_list__) - - def vocabulary_dict(self): - """ - Get vocabulary in dict. - - :return: Vocabulary in dict. - :rtype: dict - """ - return self.__vocab_dict__ - - def vocabulary_list(self): - """ - Get vocabulary in list. - - :return: Vocabulary in list - :rtype: list - """ - return self.__vocab_list__ - - def data_name_feeding(self): - """ - Get feeddings (data field name and corresponding field id). - - :return: Feeding dict. - :rtype: dict - """ - feeding = { - "audio_spectrogram": 0, - "transcript_text": 1, - } - return feeding diff --git a/compute_mean_std.py b/compute_mean_std.py new file mode 100755 index 000000000..b3015df73 --- /dev/null +++ b/compute_mean_std.py @@ -0,0 +1,56 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +from data_utils.normalizer import FeatureNormalizer +from data_utils.augmentor.augmentation import AugmentationPipeline +from data_utils.featurizer.audio_featurizer import AudioFeaturizer + +parser = argparse.ArgumentParser( + description='Computing mean and stddev for feature normalizer.') +parser.add_argument( + "--manifest_path", + default='datasets/manifest.train', + type=str, + help="Manifest path for computing normalizer's mean and stddev." + "(default: %(default)s)") +parser.add_argument( + "--num_samples", + default=500, + type=int, + help="Number of samples for computing mean and stddev. " + "(default: %(default)s)") +parser.add_argument( + "--augmentation_config", + default='{}', + type=str, + help="Augmentation configuration in json-format. " + "(default: %(default)s)") +parser.add_argument( + "--output_file", + default='mean_std.npz', + type=str, + help="Filepath to write mean and std to (.npz)." + "(default: %(default)s)") +args = parser.parse_args() + + +def main(): + augmentation_pipeline = AugmentationPipeline(args.augmentation_config) + audio_featurizer = AudioFeaturizer() + + def augment_and_featurize(audio_segment): + augmentation_pipeline.transform_audio(audio_segment) + return audio_featurizer.featurize(audio_segment) + + normalizer = FeatureNormalizer( + mean_std_filepath=None, + manifest_path=args.manifest_path, + featurize_func=augment_and_featurize, + num_samples=args.num_samples) + normalizer.write_to_file(args.output_file) + + +if __name__ == '__main__': + main() diff --git a/data_utils/__init__.py b/data_utils/__init__.py new file mode 100755 index 000000000..e69de29bb diff --git a/data_utils/audio.py b/data_utils/audio.py new file mode 100755 index 000000000..46b241201 --- /dev/null +++ b/data_utils/audio.py @@ -0,0 +1,68 @@ +import numpy as np +import io +import soundfile + + +class AudioSegment(object): + """Monaural audio segment abstraction. + """ + + def __init__(self, samples, sample_rate): + if not samples.dtype == np.float32: + raise ValueError("Sample data type of [%s] is not supported.") + self._samples = samples + self._sample_rate = sample_rate + if self._samples.ndim >= 2: + self._samples = np.mean(self._samples, 1) + + @classmethod + def from_file(cls, filepath): + samples, sample_rate = soundfile.read(filepath, dtype='float32') + return cls(samples, sample_rate) + + @classmethod + def from_bytes(cls, bytes): + samples, sample_rate = soundfile.read( + io.BytesIO(bytes), dtype='float32') + return cls(samples, sample_rate) + + def apply_gain(self, gain): + self.samples *= 10.**(gain / 20.) + + def resample(self, target_sample_rate): + raise NotImplementedError() + + def change_speed(self, rate): + raise NotImplementedError() + + @property + def samples(self): + return self._samples.copy() + + @property + def sample_rate(self): + return self._sample_rate + + @property + def duration(self): + return self._samples.shape[0] / float(self._sample_rate) + + +class SpeechSegment(AudioSegment): + def __init__(self, samples, sample_rate, transcript): + AudioSegment.__init__(self, samples, sample_rate) + self._transcript = transcript + + @classmethod + def from_file(cls, filepath, transcript): + audio = AudioSegment.from_file(filepath) + return cls(audio.samples, audio.sample_rate, transcript) + + @classmethod + def from_bytes(cls, bytes, transcript): + audio = AudioSegment.from_bytes(bytes) + return cls(audio.samples, audio.sample_rate, transcript) + + @property + def transcript(self): + return self._transcript diff --git a/data_utils/augmentor/__init__.py b/data_utils/augmentor/__init__.py new file mode 100755 index 000000000..e69de29bb diff --git a/data_utils/augmentor/augmentation.py b/data_utils/augmentor/augmentation.py new file mode 100755 index 000000000..3a1426a1f --- /dev/null +++ b/data_utils/augmentor/augmentation.py @@ -0,0 +1,38 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import json +import random +from data_utils.augmentor.volumn_perturb import VolumnPerturbAugmentor + + +class AugmentationPipeline(object): + def __init__(self, augmentation_config, random_seed=0): + self._rng = random.Random(random_seed) + self._augmentors, self._rates = self._parse_pipeline_from( + augmentation_config) + + def transform_audio(self, audio_segment): + for augmentor, rate in zip(self._augmentors, self._rates): + if self._rng.uniform(0., 1.) <= rate: + augmentor.transform_audio(audio_segment) + + def _parse_pipeline_from(self, config_json): + try: + configs = json.loads(config_json) + except Exception as e: + raise ValueError("Augmentation config json format error: " + "%s" % str(e)) + augmentors = [ + self._get_augmentor(config["type"], config["params"]) + for config in configs + ] + rates = [config["rate"] for config in configs] + return augmentors, rates + + def _get_augmentor(self, augmentor_type, params): + if augmentor_type == "volumn": + return VolumnPerturbAugmentor(self._rng, **params) + else: + raise ValueError("Unknown augmentor type [%s]." % augmentor_type) diff --git a/data_utils/augmentor/base.py b/data_utils/augmentor/base.py new file mode 100755 index 000000000..e801b9b18 --- /dev/null +++ b/data_utils/augmentor/base.py @@ -0,0 +1,17 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from abc import ABCMeta, abstractmethod + + +class AugmentorBase(object): + __metaclass__ = ABCMeta + + @abstractmethod + def __init__(self): + pass + + @abstractmethod + def transform_audio(self, audio_segment): + pass diff --git a/data_utils/augmentor/volumn_perturb.py b/data_utils/augmentor/volumn_perturb.py new file mode 100755 index 000000000..dd1ba53a7 --- /dev/null +++ b/data_utils/augmentor/volumn_perturb.py @@ -0,0 +1,17 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import random +from data_utils.augmentor.base import AugmentorBase + + +class VolumnPerturbAugmentor(AugmentorBase): + def __init__(self, rng, min_gain_dBFS, max_gain_dBFS): + self._min_gain_dBFS = min_gain_dBFS + self._max_gain_dBFS = max_gain_dBFS + self._rng = rng + + def transform_audio(self, audio_segment): + gain = self._rng.uniform(min_gain_dBFS, max_gain_dBFS) + audio_segment.apply_gain(gain) diff --git a/data_utils/data.py b/data_utils/data.py new file mode 100644 index 000000000..630007932 --- /dev/null +++ b/data_utils/data.py @@ -0,0 +1,247 @@ +""" + Providing basic audio data preprocessing pipeline, and offering + both instance-level and batch-level data reader interfaces. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import random +import numpy as np +import paddle.v2 as paddle +from data_utils import utils +from data_utils.augmentor.augmentation import AugmentationPipeline +from data_utils.featurizer.speech_featurizer import SpeechFeaturizer +from data_utils.audio import SpeechSegment +from data_utils.normalizer import FeatureNormalizer + + +class DataGenerator(object): + """ + DataGenerator provides basic audio data preprocessing pipeline, and offers + both instance-level and batch-level data reader interfaces. + Normalized FFT are used as audio features here. + + :param vocab_filepath: Vocabulary file path for indexing tokenized + transcriptions. + :type vocab_filepath: basestring + :param normalizer_manifest_path: Manifest filepath for collecting feature + normalization statistics, e.g. mean, std. + :type normalizer_manifest_path: basestring + :param normalizer_num_samples: Number of instances sampled for collecting + feature normalization statistics. + Default is 100. + :type normalizer_num_samples: int + :param max_duration: Audio clips with duration (in seconds) greater than + this will be discarded. Default is 20.0. + :type max_duration: float + :param min_duration: Audio clips with duration (in seconds) smaller than + this will be discarded. Default is 0.0. + :type min_duration: float + :param stride_ms: Striding size (in milliseconds) for generating frames. + Default is 10.0. + :type stride_ms: float + :param window_ms: Window size (in milliseconds) for frames. Default is 20.0. + :type window_ms: float + :param max_frequency: Maximun frequency for FFT features. FFT features of + frequency larger than this will be discarded. + If set None, all features will be kept. + Default is None. + :type max_frequency: float + """ + + def __init__(self, + vocab_filepath, + mean_std_filepath, + augmentation_config='{}', + max_duration=float('inf'), + min_duration=0.0, + stride_ms=10.0, + window_ms=20.0, + max_freq=None, + random_seed=0): + self._max_duration = max_duration + self._min_duration = min_duration + self._normalizer = FeatureNormalizer(mean_std_filepath) + self._augmentation_pipeline = AugmentationPipeline( + augmentation_config=augmentation_config, random_seed=random_seed) + self._speech_featurizer = SpeechFeaturizer( + vocab_filepath=vocab_filepath, + stride_ms=stride_ms, + window_ms=window_ms, + max_freq=max_freq, + random_seed=random_seed) + self._rng = random.Random(random_seed) + self._epoch = 0 + + def batch_reader_creator(self, + manifest_path, + batch_size, + padding_to=-1, + flatten=False, + sortagrad=False, + batch_shuffle=False): + """ + Batch data reader creator for audio data. Creat a callable function to + produce batches of data. + + Audio features will be padded with zeros to make each instance in the + batch to share the same audio feature shape. + + :param manifest_path: Filepath of manifest for audio clip files. + :type manifest_path: basestring + :param batch_size: Instance number in a batch. + :type batch_size: int + :param padding_to: If set -1, the maximun column numbers in the batch + will be used as the target size for padding. + Otherwise, `padding_to` will be the target size. + Default is -1. + :type padding_to: int + :param flatten: If set True, audio data will be flatten to be a 1-dim + ndarray. Otherwise, 2-dim ndarray. Default is False. + :type flatten: bool + :param sortagrad: Sort the audio clips by duration in the first epoc + if set True. + :type sortagrad: bool + :param batch_shuffle: Shuffle the audio clips if set True. It is + not a thorough instance-wise shuffle, but a + specific batch-wise shuffle. For more details, + please see `_batch_shuffle` function. + :type batch_shuffle: bool + :return: Batch reader function, producing batches of data when called. + :rtype: callable + """ + + def batch_reader(): + # read manifest + manifest = utils.read_manifest( + manifest_path=manifest_path, + max_duration=self._max_duration, + min_duration=self._min_duration) + # sort (by duration) or batch-wise shuffle the manifest + if self._epoch == 0 and sortagrad: + manifest.sort(key=lambda x: x["duration"]) + elif batch_shuffle: + manifest = self._batch_shuffle(manifest, batch_size) + # prepare batches + instance_reader = self._instance_reader_creator(manifest) + batch = [] + for instance in instance_reader(): + batch.append(instance) + if len(batch) == batch_size: + yield self._padding_batch(batch, padding_to, flatten) + batch = [] + if len(batch) > 0: + yield self._padding_batch(batch, padding_to, flatten) + self._epoch += 1 + + return batch_reader + + @property + def feeding(self): + """Returns data_reader's feeding dict.""" + return {"audio_spectrogram": 0, "transcript_text": 1} + + @property + def vocab_size(self): + """Returns vocabulary size.""" + return self._speech_featurizer.vocab_size + + @property + def vocab_list(self): + """Returns vocabulary list.""" + return self._speech_featurizer.vocab_list + + def _process_utterance(self, filename, transcript): + speech_segment = SpeechSegment.from_file(filename, transcript) + self._augmentation_pipeline.transform_audio(speech_segment) + specgram, text_ids = self._speech_featurizer.featurize(speech_segment) + specgram = self._normalizer.apply(specgram) + return specgram, text_ids + + def _instance_reader_creator(self, manifest): + """ + Instance reader creator for audio data. Creat a callable function to + produce instances of data. + + Instance: a tuple of a numpy ndarray of audio spectrogram and a list of + tokenized and indexed transcription text. + + :param manifest: Filepath of manifest for audio clip files. + :type manifest: basestring + :return: Data reader function. + :rtype: callable + """ + + def reader(): + for instance in manifest: + yield self._process_utterance(instance["audio_filepath"], + instance["text"]) + + return reader + + def _padding_batch(self, batch, padding_to=-1, flatten=False): + """ + Padding audio part of features (only in the time axis -- column axis) + with zeros, to make each instance in the batch share the same + audio feature shape. + + If `padding_to` is set -1, the maximun column numbers in the batch will + be used as the target size. Otherwise, `padding_to` will be the target + size. Default is -1. + + If `flatten` is set True, audio data will be flatten to be a 1-dim + ndarray. Default is False. + """ + new_batch = [] + # get target shape + max_length = max([audio.shape[1] for audio, text in batch]) + if padding_to != -1: + if padding_to < max_length: + raise ValueError("If padding_to is not -1, it should be greater" + " or equal to the original instance length.") + max_length = padding_to + # padding + for audio, text in batch: + padded_audio = np.zeros([audio.shape[0], max_length]) + padded_audio[:, :audio.shape[1]] = audio + if flatten: + padded_audio = padded_audio.flatten() + new_batch.append((padded_audio, text)) + return new_batch + + def _batch_shuffle(self, manifest, batch_size): + """ + The instances have different lengths and they cannot be + combined into a single matrix multiplication. It usually + sorts the training examples by length and combines only + similarly-sized instances into minibatches, pads with + silence when necessary so that all instances in a batch + have the same length. This batch shuffle fuction is used + to make similarly-sized instances into minibatches and + make a batch-wise shuffle. + + 1. Sort the audio clips by duration. + 2. Generate a random number `k`, k in [0, batch_size). + 3. Randomly remove `k` instances in order to make different mini-batches, + then make minibatches and each minibatch size is batch_size. + 4. Shuffle the minibatches. + + :param manifest: manifest file. + :type manifest: list + :param batch_size: Batch size. This size is also used for generate + a random number for batch shuffle. + :type batch_size: int + :return: batch shuffled mainifest. + :rtype: list + """ + manifest.sort(key=lambda x: x["duration"]) + shift_len = self._rng.randint(0, batch_size - 1) + batch_manifest = zip(*[iter(manifest[shift_len:])] * batch_size) + self._rng.shuffle(batch_manifest) + batch_manifest = list(sum(batch_manifest, ())) + res_len = len(manifest) - shift_len - len(batch_manifest) + batch_manifest.extend(manifest[-res_len:]) + batch_manifest.extend(manifest[0:shift_len]) + return batch_manifest diff --git a/data_utils/featurizer/__init__.py b/data_utils/featurizer/__init__.py new file mode 100755 index 000000000..e69de29bb diff --git a/data_utils/featurizer/audio_featurizer.py b/data_utils/featurizer/audio_featurizer.py new file mode 100755 index 000000000..5d9c68836 --- /dev/null +++ b/data_utils/featurizer/audio_featurizer.py @@ -0,0 +1,86 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +import random +from data_utils import utils +from data_utils.audio import AudioSegment + + +class AudioFeaturizer(object): + def __init__(self, + specgram_type='linear', + stride_ms=10.0, + window_ms=20.0, + max_freq=None, + random_seed=0): + self._specgram_type = specgram_type + self._stride_ms = stride_ms + self._window_ms = window_ms + self._max_freq = max_freq + + def featurize(self, audio_segment): + return self._compute_specgram(audio_segment.samples, + audio_segment.sample_rate) + + def _compute_specgram(self, samples, sample_rate): + if self._specgram_type == 'linear': + return self._compute_linear_specgram( + samples, sample_rate, self._stride_ms, self._window_ms, + self._max_freq) + else: + raise ValueError("Unknown specgram_type %s. " + "Supported values: linear." % self._specgram_type) + + def _compute_linear_specgram(self, + samples, + sample_rate, + stride_ms=10.0, + window_ms=20.0, + max_freq=None, + eps=1e-14): + """Laod audio data and calculate the log of spectrogram by FFT. + Refer to utils.py in https://github.com/baidu-research/ba-dls-deepspeech + """ + if max_freq is None: + max_freq = sample_rate / 2 + if max_freq > sample_rate / 2: + raise ValueError("max_freq must be greater than half of " + "sample rate.") + if stride_ms > window_ms: + raise ValueError("Stride size must not be greater than " + "window size.") + stride_size = int(0.001 * sample_rate * stride_ms) + window_size = int(0.001 * sample_rate * window_ms) + specgram, freqs = self._specgram_real( + samples, + window_size=window_size, + stride_size=stride_size, + sample_rate=sample_rate) + ind = np.where(freqs <= max_freq)[0][-1] + 1 + return np.log(specgram[:ind, :] + eps) + + def _specgram_real(self, samples, window_size, stride_size, sample_rate): + """Compute the spectrogram by FFT for a discrete real signal. + Refer to utils.py in https://github.com/baidu-research/ba-dls-deepspeech + """ + # extract strided windows + truncate_size = (len(samples) - window_size) % stride_size + samples = samples[:len(samples) - truncate_size] + nshape = (window_size, (len(samples) - window_size) // stride_size + 1) + nstrides = (samples.strides[0], samples.strides[0] * stride_size) + windows = np.lib.stride_tricks.as_strided( + samples, shape=nshape, strides=nstrides) + assert np.all( + windows[:, 1] == samples[stride_size:(stride_size + window_size)]) + # window weighting, squared Fast Fourier Transform (fft), scaling + weighting = np.hanning(window_size)[:, None] + fft = np.fft.rfft(windows * weighting, axis=0) + fft = np.absolute(fft)**2 + scale = np.sum(weighting**2) * sample_rate + fft[1:-1, :] *= (2.0 / scale) + fft[(0, -1), :] /= scale + # prepare fft frequency list + freqs = float(sample_rate) / window_size * np.arange(fft.shape[0]) + return fft, freqs diff --git a/data_utils/featurizer/speech_featurizer.py b/data_utils/featurizer/speech_featurizer.py new file mode 100755 index 000000000..06af7a026 --- /dev/null +++ b/data_utils/featurizer/speech_featurizer.py @@ -0,0 +1,32 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from data_utils.featurizer.audio_featurizer import AudioFeaturizer +from data_utils.featurizer.text_featurizer import TextFeaturizer + + +class SpeechFeaturizer(object): + def __init__(self, + vocab_filepath, + specgram_type='linear', + stride_ms=10.0, + window_ms=20.0, + max_freq=None, + random_seed=0): + self._audio_featurizer = AudioFeaturizer( + specgram_type, stride_ms, window_ms, max_freq, random_seed) + self._text_featurizer = TextFeaturizer(vocab_filepath) + + def featurize(self, speech_segment): + audio_feature = self._audio_featurizer.featurize(speech_segment) + text_ids = self._text_featurizer.text2ids(speech_segment.transcript) + return audio_feature, text_ids + + @property + def vocab_size(self): + return self._text_featurizer.vocab_size + + @property + def vocab_list(self): + return self._text_featurizer.vocab_list diff --git a/data_utils/featurizer/text_featurizer.py b/data_utils/featurizer/text_featurizer.py new file mode 100755 index 000000000..7e4b69d7b --- /dev/null +++ b/data_utils/featurizer/text_featurizer.py @@ -0,0 +1,39 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + + +class TextFeaturizer(object): + def __init__(self, vocab_filepath): + self._vocab_dict, self._vocab_list = self._load_vocabulary_from_file( + vocab_filepath) + + def text2ids(self, text): + tokens = self._char_tokenize(text) + return [self._vocab_dict[token] for token in tokens] + + def ids2text(self, ids): + return ''.join([self._vocab_list[id] for id in ids]) + + @property + def vocab_size(self): + return len(self._vocab_list) + + @property + def vocab_list(self): + return self._vocab_list + + def _char_tokenize(self, text): + return list(text.strip()) + + def _load_vocabulary_from_file(self, vocab_filepath): + """Load vocabulary from file.""" + vocab_lines = [] + with open(vocab_filepath, 'r') as file: + vocab_lines.extend(file.readlines()) + vocab_list = [line[:-1] for line in vocab_lines] + vocab_dict = dict( + [(token, id) for (id, token) in enumerate(vocab_list)]) + return vocab_dict, vocab_list diff --git a/data_utils/normalizer.py b/data_utils/normalizer.py new file mode 100755 index 000000000..364600af8 --- /dev/null +++ b/data_utils/normalizer.py @@ -0,0 +1,49 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +import random +import data_utils.utils as utils +from data_utils.audio import AudioSegment + + +class FeatureNormalizer(object): + def __init__(self, + mean_std_filepath, + manifest_path=None, + featurize_func=None, + num_samples=500, + random_seed=0): + if not mean_std_filepath: + if not (manifest_path and featurize_func): + raise ValueError("If mean_std_filepath is None, meanifest_path " + "and featurize_func should not be None.") + self._rng = random.Random(random_seed) + self._compute_mean_std(manifest_path, featurize_func, num_samples) + else: + self._read_mean_std_from_file(mean_std_filepath) + + def apply(self, features, eps=1e-14): + """Normalize features to be of zero mean and unit stddev.""" + return (features - self._mean) / (self._std + eps) + + def write_to_file(self, filepath): + np.savez(filepath, mean=self._mean, std=self._std) + + def _read_mean_std_from_file(self, filepath): + npzfile = np.load(filepath) + self._mean = npzfile["mean"] + self._std = npzfile["std"] + + def _compute_mean_std(self, manifest_path, featurize_func, num_samples): + manifest = utils.read_manifest(manifest_path) + sampled_manifest = self._rng.sample(manifest, num_samples) + features = [] + for instance in sampled_manifest: + features.append( + featurize_func( + AudioSegment.from_file(instance["audio_filepath"]))) + features = np.hstack(features) + self._mean = np.mean(features, axis=1).reshape([-1, 1]) + self._std = np.std(features, axis=1).reshape([-1, 1]) diff --git a/data_utils/utils.py b/data_utils/utils.py new file mode 100755 index 000000000..2a916b54f --- /dev/null +++ b/data_utils/utils.py @@ -0,0 +1,19 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import json + + +def read_manifest(manifest_path, max_duration=float('inf'), min_duration=0.0): + """Load and parse manifest file.""" + manifest = [] + for json_line in open(manifest_path): + try: + json_data = json.loads(json_line) + except Exception as e: + raise IOError("Error reading manifest: %s" % str(e)) + if (json_data["duration"] <= max_duration and + json_data["duration"] >= min_duration): + manifest.append(json_data) + return manifest diff --git a/data/librispeech.py b/datasets/librispeech/librispeech.py similarity index 99% rename from data/librispeech.py rename to datasets/librispeech/librispeech.py index 653caa926..1ba2a4422 100644 --- a/data/librispeech.py +++ b/datasets/librispeech/librispeech.py @@ -44,7 +44,7 @@ parser.add_argument( help="Directory to save the dataset. (default: %(default)s)") parser.add_argument( "--manifest_prefix", - default="manifest.libri", + default="manifest", type=str, help="Filepath prefix for output manifests. (default: %(default)s)") parser.add_argument( diff --git a/datasets/run_all.sh b/datasets/run_all.sh new file mode 100755 index 000000000..ef2b721fb --- /dev/null +++ b/datasets/run_all.sh @@ -0,0 +1,13 @@ +cd librispeech +python librispeech.py +if [ $? -ne 0 ]; then + echo "Prepare LibriSpeech failed. Terminated." + exit 1 +fi +cd - + +cat librispeech/manifest.train* | shuf > manifest.train +cat librispeech/manifest.dev-clean > manifest.dev +cat librispeech/manifest.test-clean > manifest.test + +echo "All done." diff --git a/data/eng_vocab.txt b/datasets/vocab/eng_vocab.txt similarity index 100% rename from data/eng_vocab.txt rename to datasets/vocab/eng_vocab.txt diff --git a/infer.py b/infer.py index 598c348b0..eb31254ce 100644 --- a/infer.py +++ b/infer.py @@ -2,11 +2,15 @@ Inference for a simplifed version of Baidu DeepSpeech2 model. """ -import paddle.v2 as paddle -import distutils.util +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + import argparse import gzip -from audio_data_utils import DataGenerator +import distutils.util +import paddle.v2 as paddle +from data_utils.data import DataGenerator from model import deep_speech2 from decoder import ctc_decode @@ -38,13 +42,13 @@ parser.add_argument( type=distutils.util.strtobool, help="Use gpu or not. (default: %(default)s)") parser.add_argument( - "--normalizer_manifest_path", - default='data/manifest.libri.train-clean-100', + "--mean_std_filepath", + default='mean_std.npz', type=str, help="Manifest path for normalizer. (default: %(default)s)") parser.add_argument( "--decode_manifest_path", - default='data/manifest.libri.test-clean', + default='datasets/manifest.test', type=str, help="Manifest path for decoding. (default: %(default)s)") parser.add_argument( @@ -54,7 +58,7 @@ parser.add_argument( help="Model filepath. (default: %(default)s)") parser.add_argument( "--vocab_filepath", - default='data/eng_vocab.txt', + default='datasets/vocab/eng_vocab.txt', type=str, help="Vocabulary filepath. (default: %(default)s)") args = parser.parse_args() @@ -67,28 +71,22 @@ def infer(): # initialize data generator data_generator = DataGenerator( vocab_filepath=args.vocab_filepath, - normalizer_manifest_path=args.normalizer_manifest_path, - normalizer_num_samples=200, - max_duration=20.0, - min_duration=0.0, - stride_ms=10, - window_ms=20) + mean_std_filepath=args.mean_std_filepath, + augmentation_config='{}') # create network config - dict_size = data_generator.vocabulary_size() - vocab_list = data_generator.vocabulary_list() + # paddle.data_type.dense_array is used for variable batch input. + # The size 161 * 161 is only an placeholder value and the real shape + # of input batch data will be induced during training. audio_data = paddle.layer.data( - name="audio_spectrogram", - height=161, - width=2000, - type=paddle.data_type.dense_vector(322000)) + name="audio_spectrogram", type=paddle.data_type.dense_array(161 * 161)) text_data = paddle.layer.data( name="transcript_text", - type=paddle.data_type.integer_value_sequence(dict_size)) + type=paddle.data_type.integer_value_sequence(data_generator.vocab_size)) output_probs = deep_speech2( audio_data=audio_data, text_data=text_data, - dict_size=dict_size, + dict_size=data_generator.vocab_size, num_conv_layers=args.num_conv_layers, num_rnn_layers=args.num_rnn_layers, rnn_size=args.rnn_layer_size, @@ -99,31 +97,30 @@ def infer(): gzip.open(args.model_filepath)) # prepare infer data - feeding = data_generator.data_name_feeding() - test_batch_reader = data_generator.batch_reader_creator( + batch_reader = data_generator.batch_reader_creator( manifest_path=args.decode_manifest_path, batch_size=args.num_samples, - padding_to=2000, - flatten=True, - sort_by_duration=False, - shuffle=False) - infer_data = test_batch_reader().next() + sortagrad=False, + batch_shuffle=False) + infer_data = batch_reader().next() # run inference infer_results = paddle.infer( output_layer=output_probs, parameters=parameters, input=infer_data) - num_steps = len(infer_results) / len(infer_data) + num_steps = len(infer_results) // len(infer_data) probs_split = [ infer_results[i * num_steps:(i + 1) * num_steps] - for i in xrange(0, len(infer_data)) + for i in xrange(len(infer_data)) ] # decode and print for i, probs in enumerate(probs_split): output_transcription = ctc_decode( - probs_seq=probs, vocabulary=vocab_list, method="best_path") + probs_seq=probs, + vocabulary=data_generator.vocab_list, + method="best_path") target_transcription = ''.join( - [vocab_list[index] for index in infer_data[i][1]]) + [data_generator.vocab_list[index] for index in infer_data[i][1]]) print("Target Transcription: %s \nOutput Transcription: %s \n" % (target_transcription, output_transcription)) diff --git a/train.py b/train.py index 957c24267..c6aa97527 100644 --- a/train.py +++ b/train.py @@ -2,21 +2,21 @@ Trainer for a simplifed version of Baidu DeepSpeech2 model. """ -import paddle.v2 as paddle -import distutils.util +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import sys +import os import argparse import gzip import time -import sys +import distutils.util +import paddle.v2 as paddle from model import deep_speech2 -from audio_data_utils import DataGenerator -import numpy as np -import os +from data_utils.data import DataGenerator -#TODO: add WER metric - -parser = argparse.ArgumentParser( - description='Simplified version of DeepSpeech2 trainer.') +parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( "--batch_size", default=32, type=int, help="Minibatch size.") parser.add_argument( @@ -51,7 +51,7 @@ parser.add_argument( help="Use gpu or not. (default: %(default)s)") parser.add_argument( "--use_sortagrad", - default=False, + default=True, type=distutils.util.strtobool, help="Use sortagrad or not. (default: %(default)s)") parser.add_argument( @@ -60,23 +60,23 @@ parser.add_argument( type=int, help="Trainer number. (default: %(default)s)") parser.add_argument( - "--normalizer_manifest_path", - default='data/manifest.libri.train-clean-100', + "--mean_std_filepath", + default='mean_std.npz', type=str, help="Manifest path for normalizer. (default: %(default)s)") parser.add_argument( "--train_manifest_path", - default='data/manifest.libri.train-clean-100', + default='datasets/manifest.train', type=str, help="Manifest path for training. (default: %(default)s)") parser.add_argument( "--dev_manifest_path", - default='data/manifest.libri.dev-clean', + default='datasets/manifest.dev', type=str, help="Manifest path for validation. (default: %(default)s)") parser.add_argument( "--vocab_filepath", - default='data/eng_vocab.txt', + default='datasets/vocab/eng_vocab.txt', type=str, help="Vocabulary filepath. (default: %(default)s)") parser.add_argument( @@ -86,6 +86,12 @@ parser.add_argument( help="If set None, the training will start from scratch. " "Otherwise, the training will resume from " "the existing model of this path. (default: %(default)s)") +parser.add_argument( + "--augmentation_config", + default='{}', + type=str, + help="Augmentation configuration in json-format. " + "(default: %(default)s)") args = parser.parse_args() @@ -98,29 +104,26 @@ def train(): def data_generator(): return DataGenerator( vocab_filepath=args.vocab_filepath, - normalizer_manifest_path=args.normalizer_manifest_path, - normalizer_num_samples=200, - max_duration=20.0, - min_duration=0.0, - stride_ms=10, - window_ms=20) + mean_std_filepath=args.mean_std_filepath, + augmentation_config=args.augmentation_config) train_generator = data_generator() test_generator = data_generator() + # create network config - dict_size = train_generator.vocabulary_size() # paddle.data_type.dense_array is used for variable batch input. - # the size 161 * 161 is only an placeholder value and the real shape - # of input batch data will be set at each batch. + # The size 161 * 161 is only an placeholder value and the real shape + # of input batch data will be induced during training. audio_data = paddle.layer.data( name="audio_spectrogram", type=paddle.data_type.dense_array(161 * 161)) text_data = paddle.layer.data( name="transcript_text", - type=paddle.data_type.integer_value_sequence(dict_size)) + type=paddle.data_type.integer_value_sequence( + train_generator.vocab_size)) cost = deep_speech2( audio_data=audio_data, text_data=text_data, - dict_size=dict_size, + dict_size=train_generator.vocab_size, num_conv_layers=args.num_conv_layers, num_rnn_layers=args.num_rnn_layers, rnn_size=args.rnn_layer_size, @@ -143,13 +146,13 @@ def train(): train_batch_reader = train_generator.batch_reader_creator( manifest_path=args.train_manifest_path, batch_size=args.batch_size, - sortagrad=True if args.init_model_path is None else False, + sortagrad=args.use_sortagrad if args.init_model_path is None else False, batch_shuffle=True) test_batch_reader = test_generator.batch_reader_creator( manifest_path=args.dev_manifest_path, batch_size=args.batch_size, + sortagrad=False, batch_shuffle=False) - feeding = train_generator.data_name_feeding() # create event handler def event_handler(event): @@ -158,8 +161,8 @@ def train(): cost_sum += event.cost cost_counter += 1 if event.batch_id % 50 == 0: - print "\nPass: %d, Batch: %d, TrainCost: %f" % ( - event.pass_id, event.batch_id, cost_sum / cost_counter) + print("\nPass: %d, Batch: %d, TrainCost: %f" % + (event.pass_id, event.batch_id, cost_sum / cost_counter)) cost_sum, cost_counter = 0.0, 0 with gzip.open("params.tar.gz", 'w') as f: parameters.to_tar(f) @@ -170,16 +173,17 @@ def train(): start_time = time.time() cost_sum, cost_counter = 0.0, 0 if isinstance(event, paddle.event.EndPass): - result = trainer.test(reader=test_batch_reader, feeding=feeding) - print "\n------- Time: %d sec, Pass: %d, ValidationCost: %s" % ( - time.time() - start_time, event.pass_id, result.cost) + result = trainer.test( + reader=test_batch_reader, feeding=test_generator.feeding) + print("\n------- Time: %d sec, Pass: %d, ValidationCost: %s" % + (time.time() - start_time, event.pass_id, result.cost)) # run train trainer.train( reader=train_batch_reader, event_handler=event_handler, num_passes=args.num_passes, - feeding=feeding) + feeding=train_generator.feeding) def main(): From bc3224eb140082d6ba286accebe85c1d019e6e8f Mon Sep 17 00:00:00 2001 From: yangyaming Date: Mon, 12 Jun 2017 12:51:01 +0800 Subject: [PATCH 032/335] Follow comments. --- error_rate.py | 94 ++++++++++++++++++++++++--------------------------- 1 file changed, 45 insertions(+), 49 deletions(-) diff --git a/error_rate.py b/error_rate.py index f216177e0..2bb637114 100644 --- a/error_rate.py +++ b/error_rate.py @@ -1,4 +1,9 @@ -# -- * -- coding: utf-8 -- * -- +# -*- coding: utf-8 -*- +""" + This module provides functions to calculate error rate in different level. + e.g. wer for word-level, cer for char-level. +""" + import numpy as np @@ -14,9 +19,9 @@ def levenshtein_distance(ref, hyp): if hyp_len == 0: return ref_len - distance = np.zeros((ref_len + 1, hyp_len + 1), dtype=np.int64) + distance = np.zeros((ref_len + 1, hyp_len + 1), dtype=np.int32) - # initialization distance matrix + # initialize distance matrix for j in xrange(hyp_len + 1): distance[0][j] = j for i in xrange(ref_len + 1): @@ -36,11 +41,10 @@ def levenshtein_distance(ref, hyp): return distance[ref_len][hyp_len] -def wer(reference, hypophysis, delimiter=' ', filter_none=True): +def wer(reference, hypothesis, ignore_case=False, delimiter=' '): """ - Calculate word error rate (WER). WER is a popular evaluation metric used - in speech recognition. It compares a reference with an hypophysis and - is defined like this: + Calculate word error rate (WER). WER compares reference text and + hypothesis text in word-level. WER is defined as: .. math:: WER = (Sw + Dw + Iw) / Nw @@ -54,41 +58,39 @@ def wer(reference, hypophysis, delimiter=' ', filter_none=True): Iw is the number of words inserted, Nw is the number of words in the reference - We can use levenshtein distance to calculate WER. Please draw an attention - that this function will truncate the beginning and ending delimiter for - reference and hypophysis sentences before calculating WER. + We can use levenshtein distance to calculate WER. Please draw an attention that + empty items will be removed when splitting sentences by delimiter. :param reference: The reference sentence. - :type reference: str - :param hypophysis: The hypophysis sentence. - :type reference: str + :type reference: basestring + :param hypothesis: The hypothesis sentence. + :type hypothesis: basestring + :param ignore_case: Whether case-sensitive or not. + :type ignore_case: bool :param delimiter: Delimiter of input sentences. :type delimiter: char - :param filter_none: Whether to remove None value when splitting sentence. - :type filter_none: bool - :return: WER + :return: Word error rate. :rtype: float """ + if ignore_case == True: + reference = reference.lower() + hypothesis = hypothesis.lower() - if len(reference.strip(delimiter)) == 0: - raise ValueError("Reference's word number should be greater than 0.") + ref_words = filter(None, reference.split(delimiter)) + hyp_words = filter(None, hypothesis.split(delimiter)) - if filter_none == True: - ref_words = filter(None, reference.strip(delimiter).split(delimiter)) - hyp_words = filter(None, hypophysis.strip(delimiter).split(delimiter)) - else: - ref_words = reference.strip(delimiter).split(delimiter) - hyp_words = reference.strip(delimiter).split(delimiter) + if len(ref_words) == 0: + raise ValueError("Reference's word number should be greater than 0.") edit_distance = levenshtein_distance(ref_words, hyp_words) wer = float(edit_distance) / len(ref_words) return wer -def cer(reference, hypophysis, squeeze=True, ignore_case=False, strip_char=''): +def cer(reference, hypothesis, ignore_case=False): """ - Calculate charactor error rate (CER). CER will compare reference text and - hypophysis text in char-level. CER is defined as: + Calculate charactor error rate (CER). CER compares reference text and + hypothesis text in char-level. CER is defined as: .. math:: CER = (Sc + Dc + Ic) / Nc @@ -97,41 +99,35 @@ def cer(reference, hypophysis, squeeze=True, ignore_case=False, strip_char=''): .. code-block:: text - Sc is the number of character substituted, - Dc is the number of deleted, - Ic is the number of inserted + Sc is the number of characters substituted, + Dc is the number of characters deleted, + Ic is the number of characters inserted Nc is the number of characters in the reference We can use levenshtein distance to calculate CER. Chinese input should be - encoded to unicode. + encoded to unicode. Please draw an attention that the leading and tailing + white space characters will be truncated and multiple consecutive white + space characters in a sentence will be replaced by one white space character. :param reference: The reference sentence. - :type reference: str - :param hypophysis: The hypophysis sentence. - :type reference: str - :param squeeze: If set true, consecutive space character - will be squeezed to one - :type squeeze: bool + :type reference: basestring + :param hypothesis: The hypothesis sentence. + :type hypothesis: basestring :param ignore_case: Whether case-sensitive or not. :type ignore_case: bool - :param strip_char: If not set to '', strip_char in beginning and ending of - sentence will be truncated. - :type strip_char: char - :return: CER + :return: Character error rate. :rtype: float """ if ignore_case == True: reference = reference.lower() - hypophysis = hypophysis.lower() - if strip_char != '': - reference = reference.strip(strip_char) - hypophysis = hypophysis.strip(strip_char) - if squeeze == True: - reference = ' '.join(filter(None, reference.split(' '))) - hypophysis = ' '.join(filter(None, hypophysis.split(' '))) + hypothesis = hypothesis.lower() + + reference = ' '.join(filter(None, reference.split(' '))) + hypothesis = ' '.join(filter(None, hypothesis.split(' '))) if len(reference) == 0: raise ValueError("Length of reference should be greater than 0.") - edit_distance = levenshtein_distance(reference, hypophysis) + + edit_distance = levenshtein_distance(reference, hypothesis) cer = float(edit_distance) / len(reference) return cer From 7db13ca9dbec998d5fff8f69c5fb5ec3d546352f Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Tue, 13 Jun 2017 14:16:54 +0800 Subject: [PATCH 033/335] enable lm in multiprocessing decoder & add script for params tuning --- decoder.py | 23 ++++-- infer.py | 9 ++- tune.py | 234 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 257 insertions(+), 9 deletions(-) create mode 100644 tune.py diff --git a/decoder.py b/decoder.py index 96e911811..824ac9701 100755 --- a/decoder.py +++ b/decoder.py @@ -73,7 +73,7 @@ class Scorer(object): return len(words) # execute evaluation - def evaluate(self, sentence): + def __call__(self, sentence): lm = self.language_model_score(sentence) word_cnt = self.word_count(sentence) score = np.power(lm, self._alpha) \ @@ -84,8 +84,9 @@ class Scorer(object): def ctc_beam_search_decoder(probs_seq, beam_size, vocabulary, + blank_id=0, ext_scoring_func=None, - blank_id=0): + nproc=False): ''' Beam search decoder for CTC-trained network, using beam search with width beam_size to find many paths to one label, return beam_size labels in @@ -107,6 +108,8 @@ def ctc_beam_search_decoder(probs_seq, :type external_scoring_function: function :param blank_id: id of blank, default 0. :type blank_id: int + :param nproc: Whether the decoder used in multiprocesses. + :type nproc: bool :return: Decoding log probability and result string. :rtype: list @@ -122,6 +125,12 @@ def ctc_beam_search_decoder(probs_seq, if not blank_id < probs_dim: raise ValueError("blank_id shouldn't be greater than probs dimension") + # If the decoder called in the multiprocesses, then use the global scorer + # instantiated in ctc_beam_search_decoder_nproc(). + if nproc is True: + global ext_nproc_scorer + ext_scoring_func = ext_nproc_scorer + ## initialize # the set containing selected prefixes prefix_set_prev = {'\t': 1.0} @@ -193,8 +202,8 @@ def ctc_beam_search_decoder(probs_seq, def ctc_beam_search_decoder_nproc(probs_split, beam_size, vocabulary, - ext_scoring_func=None, blank_id=0, + ext_scoring_func=None, num_processes=None): ''' Beam search decoder using multiple processes. @@ -202,7 +211,6 @@ def ctc_beam_search_decoder_nproc(probs_split, :param probs_seq: 3-D list with length batch_size, each element is a 2-D list of probabilities can be used by ctc_beam_search_decoder. - :type probs_seq: 3-D list :param beam_size: Width for beam search. :type beam_size: int @@ -227,10 +235,15 @@ def ctc_beam_search_decoder_nproc(probs_split, if not num_processes > 0: raise ValueError("Number of processes must be positive!") + # use global variable to pass the externnal scorer to beam search decoder + global ext_nproc_scorer + ext_nproc_scorer = ext_scoring_func + nproc = True + pool = multiprocessing.Pool(processes=num_processes) results = [] for i, probs_list in enumerate(probs_split): - args = (probs_list, beam_size, vocabulary, ext_scoring_func, blank_id) + args = (probs_list, beam_size, vocabulary, blank_id, None, nproc) results.append(pool.apply_async(ctc_beam_search_decoder, args)) pool.close() diff --git a/infer.py b/infer.py index 0be89e617..0bae13122 100644 --- a/infer.py +++ b/infer.py @@ -9,6 +9,7 @@ import gzip from audio_data_utils import DataGenerator from model import deep_speech2 from decoder import * +import kenlm from error_rate import wer parser = argparse.ArgumentParser( @@ -176,7 +177,7 @@ def infer(): probs_seq=probs, vocabulary=vocab_list, beam_size=args.beam_size, - ext_scoring_func=ext_scorer.evaluate, + ext_scoring_func=ext_scorer, blank_id=len(vocab_list)) print("\nTarget Transcription:\t%s" % target_transcription) @@ -196,9 +197,9 @@ def infer(): probs_split=probs_split, vocabulary=vocab_list, beam_size=args.beam_size, - #ext_scoring_func=ext_scorer.evaluate, - ext_scoring_func=None, - blank_id=len(vocab_list)) + ext_scoring_func=ext_scorer, + blank_id=len(vocab_list), + num_processes=1) for i, beam_search_result in enumerate(beam_search_nproc_results): target_transcription = ''.join( [vocab_list[index] for index in infer_data[i][1]]) diff --git a/tune.py b/tune.py new file mode 100644 index 000000000..3eb826489 --- /dev/null +++ b/tune.py @@ -0,0 +1,234 @@ +""" + Tune parameters for beam search decoder in Deep Speech 2. +""" + +import paddle.v2 as paddle +import distutils.util +import argparse +import gzip +from audio_data_utils import DataGenerator +from model import deep_speech2 +from decoder import * +from error_rate import wer + +parser = argparse.ArgumentParser( + description='Parameters tuning script for ctc beam search decoder in Deep Speech 2.' +) +parser.add_argument( + "--num_samples", + default=100, + type=int, + help="Number of samples for parameters tuning. (default: %(default)s)") +parser.add_argument( + "--num_conv_layers", + default=2, + type=int, + help="Convolution layer number. (default: %(default)s)") +parser.add_argument( + "--num_rnn_layers", + default=3, + type=int, + help="RNN layer number. (default: %(default)s)") +parser.add_argument( + "--rnn_layer_size", + default=512, + type=int, + help="RNN layer cell number. (default: %(default)s)") +parser.add_argument( + "--use_gpu", + default=True, + type=distutils.util.strtobool, + help="Use gpu or not. (default: %(default)s)") +parser.add_argument( + "--normalizer_manifest_path", + default='data/manifest.libri.train-clean-100', + type=str, + help="Manifest path for normalizer. (default: %(default)s)") +parser.add_argument( + "--decode_manifest_path", + default='data/manifest.libri.test-100sample', + type=str, + help="Manifest path for decoding. (default: %(default)s)") +parser.add_argument( + "--model_filepath", + default='./params.tar.gz', + type=str, + help="Model filepath. (default: %(default)s)") +parser.add_argument( + "--vocab_filepath", + default='data/eng_vocab.txt', + type=str, + help="Vocabulary filepath. (default: %(default)s)") +parser.add_argument( + "--decode_method", + default='beam_search_nproc', + type=str, + help="Method for decoding, beam_search or beam_search_nproc. (default: %(default)s)" +) +parser.add_argument( + "--beam_size", + default=500, + type=int, + help="Width for beam search decoding. (default: %(default)d)") +parser.add_argument( + "--num_results_per_sample", + default=1, + type=int, + help="Number of outputs per sample in beam search. (default: %(default)d)") +parser.add_argument( + "--language_model_path", + default="./data/1Billion.klm", + type=str, + help="Path for language model. (default: %(default)s)") +parser.add_argument( + "--alpha_from", + default=0.0, + type=float, + help="Where alpha starts from, <= alpha_to. (default: %(default)f)") +parser.add_argument( + "--alpha_stride", + default=0.001, + type=float, + help="Step length for varying alpha. (default: %(default)f)") +parser.add_argument( + "--alpha_to", + default=0.01, + type=float, + help="Where alpha ends with, >= alpha_from. (default: %(default)f)") +parser.add_argument( + "--beta_from", + default=0.0, + type=float, + help="Where beta starts from, <= beta_to. (default: %(default)f)") +parser.add_argument( + "--beta_stride", + default=0.01, + type=float, + help="Step length for varying beta. (default: %(default)f)") +parser.add_argument( + "--beta_to", + default=0.0, + type=float, + help="Where beta ends with, >= beta_from. (default: %(default)f)") +args = parser.parse_args() + + +def tune(): + """ + Tune parameters alpha and beta on one minibatch. + """ + + if not args.alpha_from <= args.alpha_to: + raise ValueError("alpha_from <= alpha_to doesn't satisfy!") + if not args.alpha_stride > 0: + raise ValueError("alpha_stride shouldn't be negative!") + + if not args.beta_from <= args.beta_to: + raise ValueError("beta_from <= beta_to doesn't satisfy!") + if not args.beta_stride > 0: + raise ValueError("beta_stride shouldn't be negative!") + + # initialize data generator + data_generator = DataGenerator( + vocab_filepath=args.vocab_filepath, + normalizer_manifest_path=args.normalizer_manifest_path, + normalizer_num_samples=200, + max_duration=20.0, + min_duration=0.0, + stride_ms=10, + window_ms=20) + + # create network config + dict_size = data_generator.vocabulary_size() + vocab_list = data_generator.vocabulary_list() + audio_data = paddle.layer.data( + name="audio_spectrogram", + height=161, + width=2000, + type=paddle.data_type.dense_vector(322000)) + text_data = paddle.layer.data( + name="transcript_text", + type=paddle.data_type.integer_value_sequence(dict_size)) + output_probs = deep_speech2( + audio_data=audio_data, + text_data=text_data, + dict_size=dict_size, + num_conv_layers=args.num_conv_layers, + num_rnn_layers=args.num_rnn_layers, + rnn_size=args.rnn_layer_size, + is_inference=True) + + # load parameters + parameters = paddle.parameters.Parameters.from_tar( + gzip.open(args.model_filepath)) + + # prepare infer data + feeding = data_generator.data_name_feeding() + test_batch_reader = data_generator.batch_reader_creator( + manifest_path=args.decode_manifest_path, + batch_size=args.num_samples, + padding_to=2000, + flatten=True, + sort_by_duration=False, + shuffle=False) + infer_data = test_batch_reader().next() + + # run inference + infer_results = paddle.infer( + output_layer=output_probs, parameters=parameters, input=infer_data) + num_steps = len(infer_results) / len(infer_data) + probs_split = [ + infer_results[i * num_steps:(i + 1) * num_steps] + for i in xrange(0, len(infer_data)) + ] + + cand_alpha = np.arange(args.alpha_from, args.alpha_to + args.alpha_stride, + args.alpha_stride) + cand_beta = np.arange(args.beta_from, args.beta_to + args.beta_stride, + args.beta_stride) + params_grid = [(alpha, beta) for alpha in cand_alpha for beta in cand_beta] + ## tune parameters in loop + for (alpha, beta) in params_grid: + wer_sum, wer_counter = 0, 0 + ext_scorer = Scorer(alpha, beta, args.language_model_path) + # beam search decode + if args.decode_method == "beam_search": + for i, probs in enumerate(probs_split): + target_transcription = ''.join( + [vocab_list[index] for index in infer_data[i][1]]) + beam_search_result = ctc_beam_search_decoder( + probs_seq=probs, + vocabulary=vocab_list, + beam_size=args.beam_size, + ext_scoring_func=ext_scorer, + blank_id=len(vocab_list)) + wer_sum += wer(target_transcription, beam_search_result[0][1]) + wer_counter += 1 + # beam search using multiple processes + elif args.decode_method == "beam_search_nproc": + beam_search_nproc_results = ctc_beam_search_decoder_nproc( + probs_split=probs_split, + vocabulary=vocab_list, + beam_size=args.beam_size, + ext_scoring_func=ext_scorer, + blank_id=len(vocab_list), + num_processes=1) + for i, beam_search_result in enumerate(beam_search_nproc_results): + target_transcription = ''.join( + [vocab_list[index] for index in infer_data[i][1]]) + wer_sum += wer(target_transcription, beam_search_result[0][1]) + wer_counter += 1 + else: + raise ValueError("Decoding method [%s] is not supported." % method) + + print("alpha = %f\tbeta = %f\tWER = %f" % + (alpha, beta, wer_sum / wer_counter)) + + +def main(): + paddle.init(use_gpu=args.use_gpu, trainer_count=1) + tune() + + +if __name__ == '__main__': + main() From a633eb9cc6d81ad9e1d9615be281b5678e256faa Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Tue, 13 Jun 2017 15:28:43 +0800 Subject: [PATCH 034/335] change two arguments --- infer.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/infer.py b/infer.py index 0bae13122..bb9dfa0a6 100644 --- a/infer.py +++ b/infer.py @@ -198,8 +198,7 @@ def infer(): vocabulary=vocab_list, beam_size=args.beam_size, ext_scoring_func=ext_scorer, - blank_id=len(vocab_list), - num_processes=1) + blank_id=len(vocab_list)) for i, beam_search_result in enumerate(beam_search_nproc_results): target_transcription = ''.join( [vocab_list[index] for index in infer_data[i][1]]) From b07ee84a1d613511193a486363937750880ea6fa Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Tue, 13 Jun 2017 23:16:07 +0800 Subject: [PATCH 035/335] Add function, class and module docs for data parts in DS2. --- compute_mean_std.py | 3 +- data_utils/audio.py | 232 ++++++++++++++++++--- data_utils/augmentor/augmentation.py | 60 +++++- data_utils/augmentor/base.py | 16 ++ data_utils/augmentor/volume_perturb.py | 40 ++++ data_utils/augmentor/volumn_perturb.py | 17 -- data_utils/data.py | 166 +++++++-------- data_utils/featurizer/audio_featurizer.py | 38 +++- data_utils/featurizer/speech_featurizer.py | 55 ++++- data_utils/featurizer/text_featurizer.py | 36 +++- data_utils/normalizer.py | 40 +++- data_utils/speech.py | 75 +++++++ data_utils/utils.py | 17 +- datasets/librispeech/librispeech.py | 16 +- decoder.py | 9 +- infer.py | 5 +- model.py | 9 +- train.py | 7 +- 18 files changed, 662 insertions(+), 179 deletions(-) create mode 100755 data_utils/augmentor/volume_perturb.py delete mode 100755 data_utils/augmentor/volumn_perturb.py create mode 100755 data_utils/speech.py diff --git a/compute_mean_std.py b/compute_mean_std.py index b3015df73..9c301c93f 100755 --- a/compute_mean_std.py +++ b/compute_mean_std.py @@ -1,3 +1,4 @@ +"""Compute mean and std for feature normalizer, and save to file.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function @@ -17,7 +18,7 @@ parser.add_argument( "(default: %(default)s)") parser.add_argument( "--num_samples", - default=500, + default=2000, type=int, help="Number of samples for computing mean and stddev. " "(default: %(default)s)") diff --git a/data_utils/audio.py b/data_utils/audio.py index 46b241201..916c8ac1a 100755 --- a/data_utils/audio.py +++ b/data_utils/audio.py @@ -1,3 +1,8 @@ +"""Contains the audio segment class.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + import numpy as np import io import soundfile @@ -5,64 +10,243 @@ import soundfile class AudioSegment(object): """Monaural audio segment abstraction. + + :param samples: Audio samples [num_samples x num_channels]. + :type samples: ndarray.float32 + :param sample_rate: Audio sample rate. + :type sample_rate: int + :raises TypeError: If the sample data type is not float or int. """ def __init__(self, samples, sample_rate): - if not samples.dtype == np.float32: - raise ValueError("Sample data type of [%s] is not supported.") - self._samples = samples + """Create audio segment from samples. + + Samples are convert float32 internally, with int scaled to [-1, 1]. + """ + self._samples = self._convert_samples_to_float32(samples) self._sample_rate = sample_rate if self._samples.ndim >= 2: self._samples = np.mean(self._samples, 1) + def __eq__(self, other): + """Return whether two objects are equal.""" + if type(other) is not type(self): + return False + if self._sample_rate != other._sample_rate: + return False + if self._samples.shape != other._samples.shape: + return False + if np.any(self.samples != other._samples): + return False + return True + + def __ne__(self, other): + """Return whether two objects are unequal.""" + return not self.__eq__(other) + + def __str__(self): + """Return human-readable representation of segment.""" + return ("%s: num_samples=%d, sample_rate=%d, duration=%.2fsec, " + "rms=%.2fdB" % (type(self), self.num_samples, self.sample_rate, + self.duration, self.rms_db)) + @classmethod - def from_file(cls, filepath): - samples, sample_rate = soundfile.read(filepath, dtype='float32') + def from_file(cls, file): + """Create audio segment from audio file. + + :param filepath: Filepath or file object to audio file. + :type filepath: basestring|file + :return: Audio segment instance. + :rtype: AudioSegment + """ + samples, sample_rate = soundfile.read(file, dtype='float32') return cls(samples, sample_rate) @classmethod def from_bytes(cls, bytes): + """Create audio segment from a byte string containing audio samples. + + :param bytes: Byte string containing audio samples. + :type bytes: str + :return: Audio segment instance. + :rtype: AudioSegment + """ samples, sample_rate = soundfile.read( io.BytesIO(bytes), dtype='float32') return cls(samples, sample_rate) + def to_wav_file(self, filepath, dtype='float32'): + """Save audio segment to disk as wav file. + + :param filepath: WAV filepath or file object to save the + audio segment. + :type filepath: basestring|file + :param dtype: Subtype for audio file. Options: 'int16', 'int32', + 'float32', 'float64'. Default is 'float32'. + :type dtype: str + :raises TypeError: If dtype is not supported. + """ + samples = self._convert_samples_from_float32(self._samples, dtype) + subtype_map = { + 'int16': 'PCM_16', + 'int32': 'PCM_32', + 'float32': 'FLOAT', + 'float64': 'DOUBLE' + } + soundfile.write( + filepath, + samples, + self._sample_rate, + format='WAV', + subtype=subtype_map[dtype]) + + def to_bytes(self, dtype='float32'): + """Create a byte string containing the audio content. + + :param dtype: Data type for export samples. Options: 'int16', 'int32', + 'float32', 'float64'. Default is 'float32'. + :type dtype: str + :return: Byte string containing audio content. + :rtype: str + """ + samples = self._convert_samples_from_float32(self._samples, dtype) + return samples.tostring() + def apply_gain(self, gain): - self.samples *= 10.**(gain / 20.) + """Apply gain in decibels to samples. + + Note that this is an in-place transformation. + + :param gain: Gain in decibels to apply to samples. + :type gain: float + """ + self._samples *= 10.**(gain / 20.) + + def change_speed(self, speed_rate): + """Change the audio speed by linear interpolation. + + Note that this is an in-place transformation. + + :param speed_rate: Rate of speed change: + speed_rate > 1.0, speed up the audio; + speed_rate = 1.0, unchanged; + speed_rate < 1.0, slow down the audio; + speed_rate <= 0.0, not allowed, raise ValueError. + :type speed_rate: float + :raises ValueError: If speed_rate <= 0.0. + """ + if speed_rate <= 0: + raise ValueError("speed_rate should be greater than zero.") + old_length = self._samples.shape[0] + new_length = int(old_length / speed_rate) + old_indices = np.arange(old_length) + new_indices = np.linspace(start=0, stop=old_length, num=new_length) + self._samples = np.interp(new_indices, old_indices, self._samples) + + def normalize(self, target_sample_rate): + raise NotImplementedError() def resample(self, target_sample_rate): raise NotImplementedError() - def change_speed(self, rate): + def pad_silence(self, duration, sides='both'): + raise NotImplementedError() + + def subsegment(self, start_sec=None, end_sec=None): + raise NotImplementedError() + + def convolve(self, filter, allow_resample=False): + raise NotImplementedError() + + def convolve_and_normalize(self, filter, allow_resample=False): raise NotImplementedError() @property def samples(self): + """Return audio samples. + + :return: Audio samples. + :rtype: ndarray + """ return self._samples.copy() @property def sample_rate(self): + """Return audio sample rate. + + :return: Audio sample rate. + :rtype: int + """ return self._sample_rate @property - def duration(self): - return self._samples.shape[0] / float(self._sample_rate) - + def num_samples(self): + """Return number of samples. -class SpeechSegment(AudioSegment): - def __init__(self, samples, sample_rate, transcript): - AudioSegment.__init__(self, samples, sample_rate) - self._transcript = transcript + :return: Number of samples. + :rtype: int + """ + return self._samples.shape(0) - @classmethod - def from_file(cls, filepath, transcript): - audio = AudioSegment.from_file(filepath) - return cls(audio.samples, audio.sample_rate, transcript) + @property + def duration(self): + """Return audio duration. - @classmethod - def from_bytes(cls, bytes, transcript): - audio = AudioSegment.from_bytes(bytes) - return cls(audio.samples, audio.sample_rate, transcript) + :return: Audio duration in seconds. + :rtype: float + """ + return self._samples.shape[0] / float(self._sample_rate) @property - def transcript(self): - return self._transcript + def rms_db(self): + """Return root mean square energy of the audio in decibels. + + :return: Root mean square energy in decibels. + :rtype: float + """ + # square root => multiply by 10 instead of 20 for dBs + mean_square = np.mean(self._samples**2) + return 10 * np.log10(mean_square) + + def _convert_samples_to_float32(self, samples): + """Convert sample type to float32. + + Audio sample type is usually integer or float-point. + Integers will be scaled to [-1, 1] in float32. + """ + float32_samples = samples.astype('float32') + if samples.dtype in np.sctypes['int']: + bits = np.iinfo(samples.dtype).bits + float32_samples *= (1. / 2**(bits - 1)) + elif samples.dtype in np.sctypes['float']: + pass + else: + raise TypeError("Unsupported sample type: %s." % samples.dtype) + return float32_samples + + def _convert_samples_from_float32(self, samples, dtype): + """Convert sample type from float32 to dtype. + + Audio sample type is usually integer or float-point. For integer + type, float32 will be rescaled from [-1, 1] to the maximum range + supported by the integer type. + + This is for writing a audio file. + """ + dtype = np.dtype(dtype) + output_samples = samples.copy() + if dtype in np.sctypes['int']: + bits = np.iinfo(dtype).bits + output_samples *= (2**(bits - 1) / 1.) + min_val = np.iinfo(dtype).min + max_val = np.iinfo(dtype).max + output_samples[output_samples > max_val] = max_val + output_samples[output_samples < min_val] = min_val + elif samples.dtype in np.sctypes['float']: + min_val = np.finfo(dtype).min + max_val = np.finfo(dtype).max + output_samples[output_samples > max_val] = max_val + output_samples[output_samples < min_val] = min_val + else: + raise TypeError("Unsupported sample type: %s." % samples.dtype) + return output_samples.astype(dtype) diff --git a/data_utils/augmentor/augmentation.py b/data_utils/augmentor/augmentation.py index 3a1426a1f..abe1a0ec8 100755 --- a/data_utils/augmentor/augmentation.py +++ b/data_utils/augmentor/augmentation.py @@ -1,38 +1,80 @@ +"""Contains the data augmentation pipeline.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import json import random -from data_utils.augmentor.volumn_perturb import VolumnPerturbAugmentor +from data_utils.augmentor.volume_perturb import VolumePerturbAugmentor class AugmentationPipeline(object): + """Build a pre-processing pipeline with various augmentation models.Such a + data augmentation pipeline is oftern leveraged to augment the training + samples to make the model invariant to certain types of perturbations in the + real world, improving model's generalization ability. + + The pipeline is built according the the augmentation configuration in json + string, e.g. + + .. code-block:: + + '[{"type": "volume", + "params": {"min_gain_dBFS": -15, + "max_gain_dBFS": 15}, + "prob": 0.5}, + {"type": "speed", + "params": {"min_speed_rate": 0.8, + "max_speed_rate": 1.2}, + "prob": 0.5} + ]' + + This augmentation configuration inserts two augmentation models + into the pipeline, with one is VolumePerturbAugmentor and the other + SpeedPerturbAugmentor. "prob" indicates the probability of the current + augmentor to take effect. + + :param augmentation_config: Augmentation configuration in json string. + :type augmentation_config: str + :param random_seed: Random seed. + :type random_seed: int + :raises ValueError: If the augmentation json config is in incorrect format". + """ + def __init__(self, augmentation_config, random_seed=0): self._rng = random.Random(random_seed) self._augmentors, self._rates = self._parse_pipeline_from( augmentation_config) def transform_audio(self, audio_segment): + """Run the pre-processing pipeline for data augmentation. + + Note that this is an in-place transformation. + + :param audio_segment: Audio segment to process. + :type audio_segment: AudioSegmenet|SpeechSegment + """ for augmentor, rate in zip(self._augmentors, self._rates): if self._rng.uniform(0., 1.) <= rate: augmentor.transform_audio(audio_segment) def _parse_pipeline_from(self, config_json): + """Parse the config json to build a augmentation pipelien.""" try: configs = json.loads(config_json) + augmentors = [ + self._get_augmentor(config["type"], config["params"]) + for config in configs + ] + rates = [config["prob"] for config in configs] except Exception as e: - raise ValueError("Augmentation config json format error: " + raise ValueError("Failed to parse the augmentation config json: " "%s" % str(e)) - augmentors = [ - self._get_augmentor(config["type"], config["params"]) - for config in configs - ] - rates = [config["rate"] for config in configs] return augmentors, rates def _get_augmentor(self, augmentor_type, params): - if augmentor_type == "volumn": - return VolumnPerturbAugmentor(self._rng, **params) + """Return an augmentation model by the type name, and pass in params.""" + if augmentor_type == "volume": + return VolumePerturbAugmentor(self._rng, **params) else: raise ValueError("Unknown augmentor type [%s]." % augmentor_type) diff --git a/data_utils/augmentor/base.py b/data_utils/augmentor/base.py index e801b9b18..a323165aa 100755 --- a/data_utils/augmentor/base.py +++ b/data_utils/augmentor/base.py @@ -1,3 +1,4 @@ +"""Contains the abstract base class for augmentation models.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function @@ -6,6 +7,11 @@ from abc import ABCMeta, abstractmethod class AugmentorBase(object): + """Abstract base class for augmentation model (augmentor) class. + All augmentor classes should inherit from this class, and implement the + following abstract methods. + """ + __metaclass__ = ABCMeta @abstractmethod @@ -14,4 +20,14 @@ class AugmentorBase(object): @abstractmethod def transform_audio(self, audio_segment): + """Adds various effects to the input audio segment. Such effects + will augment the training data to make the model invariant to certain + types of perturbations in the real world, improving model's + generalization ability. + + Note that this is an in-place transformation. + + :param audio_segment: Audio segment to add effects to. + :type audio_segment: AudioSegmenet|SpeechSegment + """ pass diff --git a/data_utils/augmentor/volume_perturb.py b/data_utils/augmentor/volume_perturb.py new file mode 100755 index 000000000..a5a9f6cad --- /dev/null +++ b/data_utils/augmentor/volume_perturb.py @@ -0,0 +1,40 @@ +"""Contains the volume perturb augmentation model.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from data_utils.augmentor.base import AugmentorBase + + +class VolumePerturbAugmentor(AugmentorBase): + """Augmentation model for adding random volume perturbation. + + This is used for multi-loudness training of PCEN. See + + https://arxiv.org/pdf/1607.05666v1.pdf + + for more details. + + :param rng: Random generator object. + :type rng: random.Random + :param min_gain_dBFS: Minimal gain in dBFS. + :type min_gain_dBFS: float + :param max_gain_dBFS: Maximal gain in dBFS. + :type max_gain_dBFS: float + """ + + def __init__(self, rng, min_gain_dBFS, max_gain_dBFS): + self._min_gain_dBFS = min_gain_dBFS + self._max_gain_dBFS = max_gain_dBFS + self._rng = rng + + def transform_audio(self, audio_segment): + """Change audio loadness. + + Note that this is an in-place transformation. + + :param audio_segment: Audio segment to add effects to. + :type audio_segment: AudioSegmenet|SpeechSegment + """ + gain = self._rng.uniform(min_gain_dBFS, max_gain_dBFS) + audio_segment.apply_gain(gain) diff --git a/data_utils/augmentor/volumn_perturb.py b/data_utils/augmentor/volumn_perturb.py deleted file mode 100755 index dd1ba53a7..000000000 --- a/data_utils/augmentor/volumn_perturb.py +++ /dev/null @@ -1,17 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import random -from data_utils.augmentor.base import AugmentorBase - - -class VolumnPerturbAugmentor(AugmentorBase): - def __init__(self, rng, min_gain_dBFS, max_gain_dBFS): - self._min_gain_dBFS = min_gain_dBFS - self._max_gain_dBFS = max_gain_dBFS - self._rng = rng - - def transform_audio(self, audio_segment): - gain = self._rng.uniform(min_gain_dBFS, max_gain_dBFS) - audio_segment.apply_gain(gain) diff --git a/data_utils/data.py b/data_utils/data.py index 630007932..48e03fe85 100644 --- a/data_utils/data.py +++ b/data_utils/data.py @@ -1,8 +1,6 @@ +"""Contains data generator for orgnaizing various audio data preprocessing +pipeline and offering data reader interface of PaddlePaddle requirements. """ - Providing basic audio data preprocessing pipeline, and offering - both instance-level and batch-level data reader interfaces. -""" - from __future__ import absolute_import from __future__ import division from __future__ import print_function @@ -13,42 +11,41 @@ import paddle.v2 as paddle from data_utils import utils from data_utils.augmentor.augmentation import AugmentationPipeline from data_utils.featurizer.speech_featurizer import SpeechFeaturizer -from data_utils.audio import SpeechSegment +from data_utils.speech import SpeechSegment from data_utils.normalizer import FeatureNormalizer class DataGenerator(object): """ DataGenerator provides basic audio data preprocessing pipeline, and offers - both instance-level and batch-level data reader interfaces. - Normalized FFT are used as audio features here. + data reader interfaces of PaddlePaddle requirements. - :param vocab_filepath: Vocabulary file path for indexing tokenized - transcriptions. + :param vocab_filepath: Vocabulary filepath for indexing tokenized + transcripts. :type vocab_filepath: basestring - :param normalizer_manifest_path: Manifest filepath for collecting feature - normalization statistics, e.g. mean, std. - :type normalizer_manifest_path: basestring - :param normalizer_num_samples: Number of instances sampled for collecting - feature normalization statistics. - Default is 100. - :type normalizer_num_samples: int - :param max_duration: Audio clips with duration (in seconds) greater than - this will be discarded. Default is 20.0. + :param mean_std_filepath: File containing the pre-computed mean and stddev. + :type mean_std_filepath: None|basestring + :param augmentation_config: Augmentation configuration in json string. + Details see AugmentationPipeline.__doc__. + :type augmentation_config: str + :param max_duration: Audio with duration (in seconds) greater than + this will be discarded. :type max_duration: float - :param min_duration: Audio clips with duration (in seconds) smaller than - this will be discarded. Default is 0.0. + :param min_duration: Audio with duration (in seconds) smaller than + this will be discarded. :type min_duration: float :param stride_ms: Striding size (in milliseconds) for generating frames. - Default is 10.0. :type stride_ms: float - :param window_ms: Window size (in milliseconds) for frames. Default is 20.0. + :param window_ms: Window size (in milliseconds) for generating frames. :type window_ms: float - :param max_frequency: Maximun frequency for FFT features. FFT features of - frequency larger than this will be discarded. - If set None, all features will be kept. - Default is None. - :type max_frequency: float + :param max_freq: Used when specgram_type is 'linear', only FFT bins + corresponding to frequencies between [0, max_freq] are + returned. + :types max_freq: None|float + :param specgram_type: Specgram feature type. Options: 'linear'. + :type specgram_type: str + :param random_seed: Random seed. + :type random_seed: int """ def __init__(self, @@ -60,6 +57,7 @@ class DataGenerator(object): stride_ms=10.0, window_ms=20.0, max_freq=None, + specgram_type='linear', random_seed=0): self._max_duration = max_duration self._min_duration = min_duration @@ -68,46 +66,49 @@ class DataGenerator(object): augmentation_config=augmentation_config, random_seed=random_seed) self._speech_featurizer = SpeechFeaturizer( vocab_filepath=vocab_filepath, + specgram_type=specgram_type, stride_ms=stride_ms, window_ms=window_ms, - max_freq=max_freq, - random_seed=random_seed) + max_freq=max_freq) self._rng = random.Random(random_seed) self._epoch = 0 def batch_reader_creator(self, manifest_path, batch_size, + min_batch_size=1, padding_to=-1, flatten=False, sortagrad=False, batch_shuffle=False): """ - Batch data reader creator for audio data. Creat a callable function to - produce batches of data. + Batch data reader creator for audio data. Return a callable generator + function to produce batches of data. - Audio features will be padded with zeros to make each instance in the - batch to share the same audio feature shape. + Audio features within one batch will be padded with zeros to have the + same shape, or a user-defined shape. - :param manifest_path: Filepath of manifest for audio clip files. + :param manifest_path: Filepath of manifest for audio files. :type manifest_path: basestring - :param batch_size: Instance number in a batch. + :param batch_size: Number of instances in a batch. :type batch_size: int - :param padding_to: If set -1, the maximun column numbers in the batch - will be used as the target size for padding. - Otherwise, `padding_to` will be the target size. - Default is -1. + :param min_batch_size: Any batch with batch size smaller than this will + be discarded. (To be deprecated in the future.) + :type min_batch_size: int + :param padding_to: If set -1, the maximun shape in the batch + will be used as the target shape for padding. + Otherwise, `padding_to` will be the target shape. :type padding_to: int - :param flatten: If set True, audio data will be flatten to be a 1-dim - ndarray. Otherwise, 2-dim ndarray. Default is False. + :param flatten: If set True, audio features will be flatten to 1darray. :type flatten: bool - :param sortagrad: Sort the audio clips by duration in the first epoc - if set True. + :param sortagrad: If set True, sort the instances by audio duration + in the first epoch for speed up training. :type sortagrad: bool - :param batch_shuffle: Shuffle the audio clips if set True. It is - not a thorough instance-wise shuffle, but a - specific batch-wise shuffle. For more details, - please see `_batch_shuffle` function. + :param batch_shuffle: If set True, instances are batch-wise shuffled. + For more details, please see + ``_batch_shuffle.__doc__``. + If sortagrad is True, batch_shuffle is disabled + for the first epoch. :type batch_shuffle: bool :return: Batch reader function, producing batches of data when called. :rtype: callable @@ -132,7 +133,7 @@ class DataGenerator(object): if len(batch) == batch_size: yield self._padding_batch(batch, padding_to, flatten) batch = [] - if len(batch) > 0: + if len(batch) >= min_batch_size: yield self._padding_batch(batch, padding_to, flatten) self._epoch += 1 @@ -140,20 +141,33 @@ class DataGenerator(object): @property def feeding(self): - """Returns data_reader's feeding dict.""" + """Returns data reader's feeding dict. + + :return: Data feeding dict. + :rtype: dict + """ return {"audio_spectrogram": 0, "transcript_text": 1} @property def vocab_size(self): - """Returns vocabulary size.""" + """Return the vocabulary size. + + :return: Vocabulary size. + :rtype: int + """ return self._speech_featurizer.vocab_size @property def vocab_list(self): - """Returns vocabulary list.""" + """Return the vocabulary in list. + + :return: Vocabulary in list. + :rtype: list + """ return self._speech_featurizer.vocab_list def _process_utterance(self, filename, transcript): + """Load, augment, featurize and normalize for speech data.""" speech_segment = SpeechSegment.from_file(filename, transcript) self._augmentation_pipeline.transform_audio(speech_segment) specgram, text_ids = self._speech_featurizer.featurize(speech_segment) @@ -162,16 +176,11 @@ class DataGenerator(object): def _instance_reader_creator(self, manifest): """ - Instance reader creator for audio data. Creat a callable function to - produce instances of data. + Instance reader creator. Create a callable function to produce + instances of data. - Instance: a tuple of a numpy ndarray of audio spectrogram and a list of - tokenized and indexed transcription text. - - :param manifest: Filepath of manifest for audio clip files. - :type manifest: basestring - :return: Data reader function. - :rtype: callable + Instance: a tuple of ndarray of audio spectrogram and a list of + token indices for transcript. """ def reader(): @@ -183,24 +192,22 @@ class DataGenerator(object): def _padding_batch(self, batch, padding_to=-1, flatten=False): """ - Padding audio part of features (only in the time axis -- column axis) - with zeros, to make each instance in the batch share the same - audio feature shape. + Padding audio features with zeros to make them have the same shape (or + a user-defined shape) within one bach. - If `padding_to` is set -1, the maximun column numbers in the batch will - be used as the target size. Otherwise, `padding_to` will be the target - size. Default is -1. + If ``padding_to`` is -1, the maximun shape in the batch will be used + as the target shape for padding. Otherwise, `padding_to` will be the + target shape (only refers to the second axis). - If `flatten` is set True, audio data will be flatten to be a 1-dim - ndarray. Default is False. + If `flatten` is True, features will be flatten to 1darray. """ new_batch = [] # get target shape max_length = max([audio.shape[1] for audio, text in batch]) if padding_to != -1: if padding_to < max_length: - raise ValueError("If padding_to is not -1, it should be greater" - " or equal to the original instance length.") + raise ValueError("If padding_to is not -1, it should be larger " + "than any instance's shape in the batch") max_length = padding_to # padding for audio, text in batch: @@ -212,28 +219,21 @@ class DataGenerator(object): return new_batch def _batch_shuffle(self, manifest, batch_size): - """ - The instances have different lengths and they cannot be - combined into a single matrix multiplication. It usually - sorts the training examples by length and combines only - similarly-sized instances into minibatches, pads with - silence when necessary so that all instances in a batch - have the same length. This batch shuffle fuction is used - to make similarly-sized instances into minibatches and - make a batch-wise shuffle. + """Put similarly-sized instances into minibatches for better efficiency + and make a batch-wise shuffle. 1. Sort the audio clips by duration. 2. Generate a random number `k`, k in [0, batch_size). - 3. Randomly remove `k` instances in order to make different mini-batches, - then make minibatches and each minibatch size is batch_size. + 3. Randomly shift `k` instances in order to create different batches + for different epochs. Create minibatches. 4. Shuffle the minibatches. - :param manifest: manifest file. + :param manifest: Manifest contents. List of dict. :type manifest: list :param batch_size: Batch size. This size is also used for generate a random number for batch shuffle. :type batch_size: int - :return: batch shuffled mainifest. + :return: Batch shuffled mainifest. :rtype: list """ manifest.sort(key=lambda x: x["duration"]) diff --git a/data_utils/featurizer/audio_featurizer.py b/data_utils/featurizer/audio_featurizer.py index 5d9c68836..9f9d4e505 100755 --- a/data_utils/featurizer/audio_featurizer.py +++ b/data_utils/featurizer/audio_featurizer.py @@ -1,30 +1,54 @@ +"""Contains the audio featurizer class.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np -import random from data_utils import utils from data_utils.audio import AudioSegment class AudioFeaturizer(object): + """Audio featurizer, for extracting features from audio contents of + AudioSegment or SpeechSegment. + + Currently, it only supports feature type of linear spectrogram. + + :param specgram_type: Specgram feature type. Options: 'linear'. + :type specgram_type: str + :param stride_ms: Striding size (in milliseconds) for generating frames. + :type stride_ms: float + :param window_ms: Window size (in milliseconds) for generating frames. + :type window_ms: float + :param max_freq: Used when specgram_type is 'linear', only FFT bins + corresponding to frequencies between [0, max_freq] are + returned. + :types max_freq: None|float + """ + def __init__(self, specgram_type='linear', stride_ms=10.0, window_ms=20.0, - max_freq=None, - random_seed=0): + max_freq=None): self._specgram_type = specgram_type self._stride_ms = stride_ms self._window_ms = window_ms self._max_freq = max_freq def featurize(self, audio_segment): + """Extract audio features from AudioSegment or SpeechSegment. + + :param audio_segment: Audio/speech segment to extract features from. + :type audio_segment: AudioSegment|SpeechSegment + :return: Spectrogram audio feature in 2darray. + :rtype: ndarray + """ return self._compute_specgram(audio_segment.samples, audio_segment.sample_rate) def _compute_specgram(self, samples, sample_rate): + """Extract various audio features.""" if self._specgram_type == 'linear': return self._compute_linear_specgram( samples, sample_rate, self._stride_ms, self._window_ms, @@ -40,9 +64,7 @@ class AudioFeaturizer(object): window_ms=20.0, max_freq=None, eps=1e-14): - """Laod audio data and calculate the log of spectrogram by FFT. - Refer to utils.py in https://github.com/baidu-research/ba-dls-deepspeech - """ + """Compute the linear spectrogram from FFT energy.""" if max_freq is None: max_freq = sample_rate / 2 if max_freq > sample_rate / 2: @@ -62,9 +84,7 @@ class AudioFeaturizer(object): return np.log(specgram[:ind, :] + eps) def _specgram_real(self, samples, window_size, stride_size, sample_rate): - """Compute the spectrogram by FFT for a discrete real signal. - Refer to utils.py in https://github.com/baidu-research/ba-dls-deepspeech - """ + """Compute the spectrogram for samples from a real signal.""" # extract strided windows truncate_size = (len(samples) - window_size) % stride_size samples = samples[:len(samples) - truncate_size] diff --git a/data_utils/featurizer/speech_featurizer.py b/data_utils/featurizer/speech_featurizer.py index 06af7a026..770204559 100755 --- a/data_utils/featurizer/speech_featurizer.py +++ b/data_utils/featurizer/speech_featurizer.py @@ -1,3 +1,4 @@ +"""Contains the speech featurizer class.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function @@ -7,26 +8,70 @@ from data_utils.featurizer.text_featurizer import TextFeaturizer class SpeechFeaturizer(object): + """Speech featurizer, for extracting features from both audio and transcript + contents of SpeechSegment. + + Currently, for audio parts, it only supports feature type of linear + spectrogram; for transcript parts, it only supports char-level tokenizing + and conversion into a list of token indices. Note that the token indexing + order follows the given vocabulary file. + + :param vocab_filepath: Filepath to load vocabulary for token indices + conversion. + :type specgram_type: basestring + :param specgram_type: Specgram feature type. Options: 'linear'. + :type specgram_type: str + :param stride_ms: Striding size (in milliseconds) for generating frames. + :type stride_ms: float + :param window_ms: Window size (in milliseconds) for generating frames. + :type window_ms: float + :param max_freq: Used when specgram_type is 'linear', only FFT bins + corresponding to frequencies between [0, max_freq] are + returned. + :types max_freq: None|float + """ + def __init__(self, vocab_filepath, specgram_type='linear', stride_ms=10.0, window_ms=20.0, - max_freq=None, - random_seed=0): - self._audio_featurizer = AudioFeaturizer( - specgram_type, stride_ms, window_ms, max_freq, random_seed) + max_freq=None): + self._audio_featurizer = AudioFeaturizer(specgram_type, stride_ms, + window_ms, max_freq) self._text_featurizer = TextFeaturizer(vocab_filepath) def featurize(self, speech_segment): + """Extract features for speech segment. + + 1. For audio parts, extract the audio features. + 2. For transcript parts, convert text string to a list of token indices + in char-level. + + :param audio_segment: Speech segment to extract features from. + :type audio_segment: SpeechSegment + :return: A tuple of 1) spectrogram audio feature in 2darray, 2) list of + char-level token indices. + :rtype: tuple + """ audio_feature = self._audio_featurizer.featurize(speech_segment) - text_ids = self._text_featurizer.text2ids(speech_segment.transcript) + text_ids = self._text_featurizer.featurize(speech_segment.transcript) return audio_feature, text_ids @property def vocab_size(self): + """Return the vocabulary size. + + :return: Vocabulary size. + :rtype: int + """ return self._text_featurizer.vocab_size @property def vocab_list(self): + """Return the vocabulary in list. + + :return: Vocabulary in list. + :rtype: list + """ return self._text_featurizer.vocab_list diff --git a/data_utils/featurizer/text_featurizer.py b/data_utils/featurizer/text_featurizer.py index 7e4b69d7b..4f9a49b59 100755 --- a/data_utils/featurizer/text_featurizer.py +++ b/data_utils/featurizer/text_featurizer.py @@ -1,3 +1,4 @@ +"""Contains the text featurizer class.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function @@ -6,26 +7,53 @@ import os class TextFeaturizer(object): + """Text featurizer, for processing or extracting features from text. + + Currently, it only supports char-level tokenizing and conversion into + a list of token indices. Note that the token indexing order follows the + given vocabulary file. + + :param vocab_filepath: Filepath to load vocabulary for token indices + conversion. + :type specgram_type: basestring + """ + def __init__(self, vocab_filepath): self._vocab_dict, self._vocab_list = self._load_vocabulary_from_file( vocab_filepath) - def text2ids(self, text): + def featurize(self, text): + """Convert text string to a list of token indices in char-level.Note + that the token indexing order follows the given vocabulary file. + + :param text: Text to process. + :type text: basestring + :return: List of char-level token indices. + :rtype: list + """ tokens = self._char_tokenize(text) return [self._vocab_dict[token] for token in tokens] - def ids2text(self, ids): - return ''.join([self._vocab_list[id] for id in ids]) - @property def vocab_size(self): + """Return the vocabulary size. + + :return: Vocabulary size. + :rtype: int + """ return len(self._vocab_list) @property def vocab_list(self): + """Return the vocabulary in list. + + :return: Vocabulary in list. + :rtype: list + """ return self._vocab_list def _char_tokenize(self, text): + """Character tokenizer.""" return list(text.strip()) def _load_vocabulary_from_file(self, vocab_filepath): diff --git a/data_utils/normalizer.py b/data_utils/normalizer.py index 364600af8..c123d25d2 100755 --- a/data_utils/normalizer.py +++ b/data_utils/normalizer.py @@ -1,3 +1,4 @@ +"""Contains feature normalizers.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function @@ -9,6 +10,28 @@ from data_utils.audio import AudioSegment class FeatureNormalizer(object): + """Feature normalizer. Normalize features to be of zero mean and unit + stddev. + + if mean_std_filepath is provided (not None), the normalizer will directly + initilize from the file. Otherwise, both manifest_path and featurize_func + should be given for on-the-fly mean and stddev computing. + + :param mean_std_filepath: File containing the pre-computed mean and stddev. + :type mean_std_filepath: None|basestring + :param manifest_path: Manifest of instances for computing mean and stddev. + :type meanifest_path: None|basestring + :param featurize_func: Function to extract features. It should be callable + with ``featurize_func(audio_segment)``. + :type featurize_func: None|callable + :param num_samples: Number of random samples for computing mean and stddev. + :type num_samples: int + :param random_seed: Random seed for sampling instances. + :type random_seed: int + :raises ValueError: If both mean_std_filepath and manifest_path + (or both mean_std_filepath and featurize_func) are None. + """ + def __init__(self, mean_std_filepath, manifest_path=None, @@ -25,18 +48,33 @@ class FeatureNormalizer(object): self._read_mean_std_from_file(mean_std_filepath) def apply(self, features, eps=1e-14): - """Normalize features to be of zero mean and unit stddev.""" + """Normalize features to be of zero mean and unit stddev. + + :param features: Input features to be normalized. + :type features: ndarray + :param eps: added to stddev to provide numerical stablibity. + :type eps: float + :return: Normalized features. + :rtype: ndarray + """ return (features - self._mean) / (self._std + eps) def write_to_file(self, filepath): + """Write the mean and stddev to the file. + + :param filepath: File to write mean and stddev. + :type filepath: basestring + """ np.savez(filepath, mean=self._mean, std=self._std) def _read_mean_std_from_file(self, filepath): + """Load mean and std from file.""" npzfile = np.load(filepath) self._mean = npzfile["mean"] self._std = npzfile["std"] def _compute_mean_std(self, manifest_path, featurize_func, num_samples): + """Compute mean and std from randomly sampled instances.""" manifest = utils.read_manifest(manifest_path) sampled_manifest = self._rng.sample(manifest, num_samples) features = [] diff --git a/data_utils/speech.py b/data_utils/speech.py new file mode 100755 index 000000000..48db595b4 --- /dev/null +++ b/data_utils/speech.py @@ -0,0 +1,75 @@ +"""Contains the speech segment class.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from data_utils.audio import AudioSegment + + +class SpeechSegment(AudioSegment): + """Speech segment abstraction, a subclass of AudioSegment, + with an additional transcript. + + :param samples: Audio samples [num_samples x num_channels]. + :type samples: ndarray.float32 + :param sample_rate: Audio sample rate. + :type sample_rate: int + :param transcript: Transcript text for the speech. + :type transript: basestring + :raises TypeError: If the sample data type is not float or int. + """ + + def __init__(self, samples, sample_rate, transcript): + AudioSegment.__init__(self, samples, sample_rate) + self._transcript = transcript + + def __eq__(self, other): + """Return whether two objects are equal. + """ + if not AudioSegment.__eq__(self, other): + return False + if self._transcript != other._transcript: + return False + return True + + def __ne__(self, other): + """Return whether two objects are unequal.""" + return not self.__eq__(other) + + @classmethod + def from_file(cls, filepath, transcript): + """Create speech segment from audio file and corresponding transcript. + + :param filepath: Filepath or file object to audio file. + :type filepath: basestring|file + :param transcript: Transcript text for the speech. + :type transript: basestring + :return: Audio segment instance. + :rtype: AudioSegment + """ + audio = AudioSegment.from_file(filepath) + return cls(audio.samples, audio.sample_rate, transcript) + + @classmethod + def from_bytes(cls, bytes, transcript): + """Create speech segment from a byte string and corresponding + transcript. + + :param bytes: Byte string containing audio samples. + :type bytes: str + :param transcript: Transcript text for the speech. + :type transript: basestring + :return: Audio segment instance. + :rtype: AudioSegment + """ + audio = AudioSegment.from_bytes(bytes) + return cls(audio.samples, audio.sample_rate, transcript) + + @property + def transcript(self): + """Return the transcript text. + + :return: Transcript text for the speech. + :rtype: basestring + """ + return self._transcript diff --git a/data_utils/utils.py b/data_utils/utils.py index 2a916b54f..3f1165718 100755 --- a/data_utils/utils.py +++ b/data_utils/utils.py @@ -1,3 +1,4 @@ +"""Contains data helper functions.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function @@ -6,7 +7,21 @@ import json def read_manifest(manifest_path, max_duration=float('inf'), min_duration=0.0): - """Load and parse manifest file.""" + """Load and parse manifest file. + + Instances with durations outside [min_duration, max_duration] will be + filtered out. + + :param manifest_path: Manifest file to load and parse. + :type manifest_path: basestring + :param max_duration: Maximal duration in seconds for instance filter. + :type max_duration: float + :param min_duration: Minimal duration in seconds for instance filter. + :type min_duration: float + :return: Manifest parsing results. List of dict. + :rtype: list + :raises IOError: If failed to parse the manifest. + """ manifest = [] for json_line in open(manifest_path): try: diff --git a/datasets/librispeech/librispeech.py b/datasets/librispeech/librispeech.py index 1ba2a4422..faf038cc1 100644 --- a/datasets/librispeech/librispeech.py +++ b/datasets/librispeech/librispeech.py @@ -1,13 +1,14 @@ -""" - Download, unpack and create manifest json files for the Librespeech dataset. +"""Prepare Librispeech ASR datasets. - A manifest is a json file summarizing filelist in a data set, with each line - containing the meta data (i.e. audio filepath, transcription text, audio - duration) of each audio file in the data set. +Download, unpack and create manifest files. +Manifest file is a json-format file with each line containing the +meta data (i.e. audio filepath, transcript and audio duration) +of each audio file in the data set. """ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function -import paddle.v2 as paddle -from paddle.v2.dataset.common import md5file import distutils.util import os import wget @@ -15,6 +16,7 @@ import tarfile import argparse import soundfile import json +from paddle.v2.dataset.common import md5file DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech') diff --git a/decoder.py b/decoder.py index 7c4b95263..8314885ce 100755 --- a/decoder.py +++ b/decoder.py @@ -1,9 +1,10 @@ -""" - CTC-like decoder utilitis. -""" +"""Contains various CTC decoder.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function -from itertools import groupby import numpy as np +from itertools import groupby def ctc_best_path_decode(probs_seq, vocabulary): diff --git a/infer.py b/infer.py index eb31254ce..f7c99df11 100644 --- a/infer.py +++ b/infer.py @@ -1,7 +1,4 @@ -""" - Inference for a simplifed version of Baidu DeepSpeech2 model. -""" - +"""Inferer for DeepSpeech2 model.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function diff --git a/model.py b/model.py index 13ff829b9..cb0b4ecbb 100644 --- a/model.py +++ b/model.py @@ -1,11 +1,10 @@ -""" - A simplifed version of Baidu DeepSpeech2 model. -""" +"""Contains DeepSpeech2 model.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function import paddle.v2 as paddle -#TODO: add bidirectional rnn. - def conv_bn_layer(input, filter_size, num_channels_in, num_channels_out, stride, padding, act): diff --git a/train.py b/train.py index c6aa97527..7ac4626f4 100644 --- a/train.py +++ b/train.py @@ -1,7 +1,4 @@ -""" - Trainer for a simplifed version of Baidu DeepSpeech2 model. -""" - +"""Trainer for DeepSpeech2 model.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function @@ -164,7 +161,7 @@ def train(): print("\nPass: %d, Batch: %d, TrainCost: %f" % (event.pass_id, event.batch_id, cost_sum / cost_counter)) cost_sum, cost_counter = 0.0, 0 - with gzip.open("params.tar.gz", 'w') as f: + with gzip.open("params_tmp.tar.gz", 'w') as f: parameters.to_tar(f) else: sys.stdout.write('.') From 1cef98f2101b37c9ff63a02ed6955c99f5edb09e Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Tue, 13 Jun 2017 23:33:38 +0800 Subject: [PATCH 036/335] Update README.md for DS2. --- README.md | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 7a372e9be..23e0b412b 100644 --- a/README.md +++ b/README.md @@ -16,34 +16,48 @@ For some machines, we also need to install libsndfile1. Details to be added. ### Preparing Data ``` -cd data -python librispeech.py -cat manifest.libri.train-* > manifest.libri.train-all +cd datasets +sh run_all.sh cd .. ``` -After running librispeech.py, we have several "manifest" json files named with a prefix `manifest.libri.`. A manifest file summarizes a speech data set, with each line containing the meta data (i.e. audio filepath, transcription text, audio duration) of each audio file within the data set, in json format. +`sh run_all.sh` prepares all ASR datasets (currently, only LibriSpeech available). After running, we have several summarization manifest files in json-format. -By `cat manifest.libri.train-* > manifest.libri.train-all`, we simply merge the three seperate sample sets of LibriSpeech (train-clean-100, train-clean-360, train-other-500) into one training set. This is a simple way for merging different data sets. +A manifest file summarizes a speech data set, with each line containing the meta data (i.e. audio filepath, transcript text, audio duration) of each audio file within the data set, in json format. Manifest file serves as an interface informing our system of where and what to read the speech samples. + + +More help for arguments: + +``` +python datasets/librispeech/librispeech.py --help +``` + +### Preparing for Training + +``` +python compute_mean_std.py +``` + +`python compute_mean_std.py` computes mean and stdandard deviation for audio features, and save them to a file with a default name `./mean_std.npz`. This file will be used in both training and inferencing. More help for arguments: ``` -python librispeech.py --help +python compute_mean_std.py --help ``` -### Traininig +### Training For GPU Training: ``` -CUDA_VISIBLE_DEVICES=0,1,2,3 python train.py --trainer_count 4 --train_manifest_path ./data/manifest.libri.train-all +CUDA_VISIBLE_DEVICES=0,1,2,3 python train.py --trainer_count 4 ``` For CPU Training: ``` -python train.py --trainer_count 8 --use_gpu False -- train_manifest_path ./data/manifest.libri.train-all +python train.py --trainer_count 8 --use_gpu False ``` More help for arguments: @@ -55,7 +69,7 @@ python train.py --help ### Inferencing ``` -python infer.py +CUDA_VISIBLE_DEVICES=0 python infer.py ``` More help for arguments: From f85f8558cf8fb6b3037f6d0c4b4be8dd30afdc0d Mon Sep 17 00:00:00 2001 From: yangyaming Date: Wed, 14 Jun 2017 15:00:10 +0800 Subject: [PATCH 037/335] Add unittest. --- tests/test_error_rate.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 tests/test_error_rate.py diff --git a/tests/test_error_rate.py b/tests/test_error_rate.py new file mode 100644 index 000000000..bb6dca30a --- /dev/null +++ b/tests/test_error_rate.py @@ -0,0 +1,29 @@ +# -*- coding: utf-8 -*- +import unittest +import sys +sys.path.append('..') +import error_rate + + +class TestParse(unittest.TestCase): + def test_wer(self): + ref = 'i UM the PHONE IS i LEFT THE portable PHONE UPSTAIRS last night' + hyp = 'i GOT IT TO the FULLEST i LOVE TO portable FROM OF STORES last night' + word_error_rate = error_rate.wer(ref, hyp) + self.assertTrue(abs(word_error_rate - 0.769230769231) < 1e-6) + + def test_cer_en(self): + ref = 'werewolf' + hyp = 'weae wolf' + char_error_rate = error_rate.cer(ref, hyp) + self.assertTrue(abs(char_error_rate - 0.25) < 1e-6) + + def test_cer_zh(self): + ref = u'我是中国人' + hyp = u'我是 美洲人' + char_error_rate = error_rate.cer(ref, hyp) + self.assertTrue(abs(char_error_rate - 0.6) < 1e-6) + + +if __name__ == '__main__': + unittest.main() From 04a225ae4f8f7f4af068207627bb65b93bdd5fe6 Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Wed, 14 Jun 2017 18:14:50 +0800 Subject: [PATCH 038/335] Enable min_batch_num in train.py and update train info print. --- compute_mean_std.py | 0 data_utils/__init__.py | 0 data_utils/audio.py | 0 data_utils/augmentor/__init__.py | 0 data_utils/augmentor/augmentation.py | 0 data_utils/augmentor/base.py | 0 data_utils/augmentor/volume_perturb.py | 0 data_utils/featurizer/__init__.py | 0 data_utils/featurizer/audio_featurizer.py | 0 data_utils/featurizer/speech_featurizer.py | 0 data_utils/featurizer/text_featurizer.py | 0 data_utils/normalizer.py | 0 data_utils/speech.py | 0 data_utils/utils.py | 0 datasets/run_all.sh | 0 decoder.py | 0 train.py | 10 ++++++---- 17 files changed, 6 insertions(+), 4 deletions(-) mode change 100755 => 100644 compute_mean_std.py mode change 100755 => 100644 data_utils/__init__.py mode change 100755 => 100644 data_utils/audio.py mode change 100755 => 100644 data_utils/augmentor/__init__.py mode change 100755 => 100644 data_utils/augmentor/augmentation.py mode change 100755 => 100644 data_utils/augmentor/base.py mode change 100755 => 100644 data_utils/augmentor/volume_perturb.py mode change 100755 => 100644 data_utils/featurizer/__init__.py mode change 100755 => 100644 data_utils/featurizer/audio_featurizer.py mode change 100755 => 100644 data_utils/featurizer/speech_featurizer.py mode change 100755 => 100644 data_utils/featurizer/text_featurizer.py mode change 100755 => 100644 data_utils/normalizer.py mode change 100755 => 100644 data_utils/speech.py mode change 100755 => 100644 data_utils/utils.py mode change 100755 => 100644 datasets/run_all.sh mode change 100755 => 100644 decoder.py diff --git a/compute_mean_std.py b/compute_mean_std.py old mode 100755 new mode 100644 diff --git a/data_utils/__init__.py b/data_utils/__init__.py old mode 100755 new mode 100644 diff --git a/data_utils/audio.py b/data_utils/audio.py old mode 100755 new mode 100644 diff --git a/data_utils/augmentor/__init__.py b/data_utils/augmentor/__init__.py old mode 100755 new mode 100644 diff --git a/data_utils/augmentor/augmentation.py b/data_utils/augmentor/augmentation.py old mode 100755 new mode 100644 diff --git a/data_utils/augmentor/base.py b/data_utils/augmentor/base.py old mode 100755 new mode 100644 diff --git a/data_utils/augmentor/volume_perturb.py b/data_utils/augmentor/volume_perturb.py old mode 100755 new mode 100644 diff --git a/data_utils/featurizer/__init__.py b/data_utils/featurizer/__init__.py old mode 100755 new mode 100644 diff --git a/data_utils/featurizer/audio_featurizer.py b/data_utils/featurizer/audio_featurizer.py old mode 100755 new mode 100644 diff --git a/data_utils/featurizer/speech_featurizer.py b/data_utils/featurizer/speech_featurizer.py old mode 100755 new mode 100644 diff --git a/data_utils/featurizer/text_featurizer.py b/data_utils/featurizer/text_featurizer.py old mode 100755 new mode 100644 diff --git a/data_utils/normalizer.py b/data_utils/normalizer.py old mode 100755 new mode 100644 diff --git a/data_utils/speech.py b/data_utils/speech.py old mode 100755 new mode 100644 diff --git a/data_utils/utils.py b/data_utils/utils.py old mode 100755 new mode 100644 diff --git a/datasets/run_all.sh b/datasets/run_all.sh old mode 100755 new mode 100644 diff --git a/decoder.py b/decoder.py old mode 100755 new mode 100644 diff --git a/train.py b/train.py index 7ac4626f4..6074aa358 100644 --- a/train.py +++ b/train.py @@ -143,11 +143,13 @@ def train(): train_batch_reader = train_generator.batch_reader_creator( manifest_path=args.train_manifest_path, batch_size=args.batch_size, + min_batch_size=args.trainer_count, sortagrad=args.use_sortagrad if args.init_model_path is None else False, batch_shuffle=True) test_batch_reader = test_generator.batch_reader_creator( manifest_path=args.dev_manifest_path, batch_size=args.batch_size, + min_batch_size=1, # must be 1, but will have errors. sortagrad=False, batch_shuffle=False) @@ -157,11 +159,11 @@ def train(): if isinstance(event, paddle.event.EndIteration): cost_sum += event.cost cost_counter += 1 - if event.batch_id % 50 == 0: - print("\nPass: %d, Batch: %d, TrainCost: %f" % - (event.pass_id, event.batch_id, cost_sum / cost_counter)) + if (event.batch_id + 1) % 100 == 0: + print("\nPass: %d, Batch: %d, TrainCost: %f" % ( + event.pass_id, event.batch_id + 1, cost_sum / cost_counter)) cost_sum, cost_counter = 0.0, 0 - with gzip.open("params_tmp.tar.gz", 'w') as f: + with gzip.open("params.tar.gz", 'w') as f: parameters.to_tar(f) else: sys.stdout.write('.') From 65e34c535b4444c42c28f14b16a2617a73d296d1 Mon Sep 17 00:00:00 2001 From: chrisxu2016 <823254351@qq.com> Date: Thu, 15 Jun 2017 03:08:30 +0800 Subject: [PATCH 039/335] add augmentation --- data_utils/audio.py | 396 ++++++++++++++++- data_utils/augmentor/audio_database.py | 401 ++++++++++++++++++ data_utils/augmentor/augmentation.py | 15 + data_utils/augmentor/implus_response.py | 76 ++++ data_utils/augmentor/noise_speech.py | 318 ++++++++++++++ .../online_bayesian_normalization.py | 57 +++ data_utils/augmentor/resampler.py | 30 ++ data_utils/augmentor/speed_perturb.py | 53 +++ data_utils/augmentor/volume_perturb.py | 4 +- 9 files changed, 1337 insertions(+), 13 deletions(-) create mode 100755 data_utils/augmentor/audio_database.py create mode 100755 data_utils/augmentor/implus_response.py create mode 100755 data_utils/augmentor/noise_speech.py create mode 100755 data_utils/augmentor/online_bayesian_normalization.py create mode 100755 data_utils/augmentor/resampler.py create mode 100755 data_utils/augmentor/speed_perturb.py diff --git a/data_utils/audio.py b/data_utils/audio.py index 916c8ac1a..aef13c30f 100755 --- a/data_utils/audio.py +++ b/data_utils/audio.py @@ -6,6 +6,8 @@ from __future__ import print_function import numpy as np import io import soundfile +import scikits.samplerate +from scipy import signal class AudioSegment(object): @@ -62,6 +64,69 @@ class AudioSegment(object): samples, sample_rate = soundfile.read(file, dtype='float32') return cls(samples, sample_rate) + @classmethod + def slice_from_file(cls, fname, start=None, end=None): + """ + Loads a small section of an audio without having to load + the entire file into the memory which can be incredibly wasteful. + + :param fname: input audio file name + :type fname: bsaestring + :param start: start time in seconds (supported granularity is ms) + If start is negative, it wraps around from the end. If not + provided, this function reads from the very beginning. + :type start: float + :param end: start time in seconds (supported granularity is ms) + If end is negative, it wraps around from the end. If not + provided, the default behvaior is to read to the end of the + file. + :type end: float + + :return:the specified slice of input audio in the audio.AudioSegment + format. + """ + sndfile = soundfile.SoundFile(fname) + + sample_rate = sndfile.samplerate + if sndfile.channels != 1: + raise TypeError("{} has more than 1 channel.".format(fname)) + + duration = float(len(sndfile)) / sample_rate + + if start is None: + start = 0.0 + if end is None: + end = duration + + if start < 0.0: + start += duration + if end < 0.0: + end += duration + + if start < 0.0: + raise IndexError("The slice start position ({} s) is out of " + "bounds. Filename: {}".format(start, fname)) + if end < 0.0: + raise IndexError("The slice end position ({} s) is out of bounds " + "Filename: {}".format(end, fname)) + + if start > end: + raise IndexError("The slice start position ({} s) is later than " + "the slice end position ({} s)." + .format(start, end)) + + if end > duration: + raise ValueError("The slice end time ({} s) is out of " + "bounds (> {} s) Filename: {}" + .format(end, duration, fname)) + + start_frame = int(start * sample_rate) + end_frame = int(end * sample_rate) + sndfile.seek(start_frame) + data = sndfile.read(frames=end_frame - start_frame, dtype='float32') + + return cls(data, sample_rate) + @classmethod def from_bytes(cls, bytes): """Create audio segment from a byte string containing audio samples. @@ -75,6 +140,44 @@ class AudioSegment(object): io.BytesIO(bytes), dtype='float32') return cls(samples, sample_rate) + @classmethod + def make_silence(cls, duration, sample_rate): + """Creates a silent audio segment of the given duration and + sample rate. + + :param duration: length of silence in seconds + :type duration: scalar + :param sample_rate: sample rate + :type sample_rate: scalar + :returns: silence of the given duration + :rtype: AudioSegment + """ + samples = np.zeros(int(float(duration) * sample_rate)) + return cls(samples, sample_rate) + + @classmethod + def concatenate(cls, *segments): + """Concatenate an arbitrary number of audio segments together. + + :param *segments: input audio segments + :type *segments: [AudioSegment] + """ + # Perform basic sanity-checks. + N = len(segments) + if N == 0: + raise ValueError("No audio segments are given to concatenate.") + sample_rate = segments[0]._sample_rate + for segment in segments: + if sample_rate != segment._sample_rate: + raise ValueError("Can't concatenate segments with " + "different sample rates") + if type(segment) is not cls: + raise TypeError("Only audio segments of the same type " + "instance can be concatenated.") + + samples = np.concatenate([seg.samples for seg in segments]) + return cls(samples, sample_rate) + def to_wav_file(self, filepath, dtype='float32'): """Save audio segment to disk as wav file. @@ -143,23 +246,288 @@ class AudioSegment(object): new_indices = np.linspace(start=0, stop=old_length, num=new_length) self._samples = np.interp(new_indices, old_indices, self._samples) - def normalize(self, target_sample_rate): - raise NotImplementedError() + def normalize(self, target_db=-20, max_gain_db=300.0): + """Normalize audio to desired RMS value in decibels. + + Note that this is an in-place transformation. + + :param target_db: Target RMS value in decibels.This value + should be less than 0.0 as 0.0 is full-scale audio. + :type target_db: float, optional + :param max_gain_db: Max amount of gain in dB that can be applied + for normalization. This is to prevent nans when attempting + to normalize a signal consisting of all zeros. + :type max_gain_db: float, optional - def resample(self, target_sample_rate): - raise NotImplementedError() + :raises NormalizationWarning: if the required gain to normalize the + segment to the target_db value exceeds max_gain_db. + """ + gain = target_db - self.rms_db + if gain > max_gain_db: + raise ValueError( + "Unable to normalize segment to {} dB because it has an RMS " + "value of {} dB and the difference exceeds max_gain_db ({} dB)" + .format(target_db, self.rms_db, max_gain_db)) + gain = min(max_gain_db, target_db - self.rms_db) + self.apply_gain(gain) + + def normalize_online_bayesian(self, + target_db, + prior_db, + prior_samples, + startup_delay=0.0): + """ + Normalize audio using a production-compatible online/causal algorithm. + This uses an exponential likelihood and gamma prior to make + online estimates of the RMS even when there are very few samples. + + Note that this is an in-place transformation. + + :param target_db: Target RMS value in decibels + :type target_bd: scalar + :param prior_db: Prior RMS estimate in decibels + :type prior_db: scalar + :param prior_samples: Prior strength in number of samples + :type prior_samples: scalar + :param startup_delay: Default: 0.0 s. If provided, this + function will accrue statistics for the first startup_delay + seconds before applying online normalization. + :type startup_delay: scalar + """ + # Estimate total RMS online + startup_sample_idx = min(self.num_samples - 1, + int(self.sample_rate * startup_delay)) + prior_mean_squared = 10.**(prior_db / 10.) + prior_sum_of_squares = prior_mean_squared * prior_samples + cumsum_of_squares = np.cumsum(self.samples**2) + sample_count = np.arange(len(self)) + 1 + if startup_sample_idx > 0: + cumsum_of_squares[:startup_sample_idx] = \ + cumsum_of_squares[startup_sample_idx] + sample_count[:startup_sample_idx] = \ + sample_count[startup_sample_idx] + mean_squared_estimate = ((cumsum_of_squares + prior_sum_of_squares) / + (sample_count + prior_samples)) + rms_estimate_db = 10 * np.log10(mean_squared_estimate) + + # Compute required time-varying gain + gain_db = target_db - rms_estimate_db + + # Apply gain to new segment + self.apply_gain(gain_db) + + def normalize_ewma(self, + target_db, + decay_rate, + startup_delay, + rms_eps=1e-6, + max_gain_db=300.0): + startup_sample_idx = min(self.num_samples - 1, + int(self.sample_rate * startup_delay)) + mean_sq = self.samples**2 + if startup_sample_idx > 0: + mean_sq[:startup_sample_idx] = \ + np.sum(mean_sq[:startup_sample_idx]) / startup_sample_idx + idx_start = max(0, startup_sample_idx - 1) + initial_condition = mean_sq[idx_start] * decay_rate + mean_sq[idx_start:] = lfilter( + [1.0 - decay_rate], [1.0, -decay_rate], + mean_sq[idx_start:], + axis=0, + zi=[initial_condition])[0] + rms_estimate_db = 10.0 * np.log10(mean_sq + rms_eps) + gain_db = target_db - rms_estimate_db + if np.any(gain_db > max_gain_db): + warnings.warn( + "Unable to normalize segment to {} dB because it has an RMS " + "value of {} dB and the difference exceeds max_gain_db ({} dB)" + .format(target_db, self.rms_db, max_gain_db), + NormalizationWarning) + gain_db = np.minimum(gain_db, max_gain_db) + self.apply_gain(gain_db) + + def resample(self, target_sample_rate, quality='sinc_medium'): + """Resample audio and return new AudioSegment. + This resamples the audio to a new sample rate and returns a brand + new AudioSegment. The existing AudioSegment is unchanged. + + Note that this is an in-place transformation. + + :param new_sample_rate: target sample rate + :type new_sample_rate: scalar + :param quality: One of {'sinc_fastest', 'sinc_medium', 'sinc_best'}. + Sets resampling speed/quality tradeoff. + See http://www.mega-nerd.com/SRC/api_misc.html#Converters + :type quality: basestring + """ + resample_ratio = target_sample_rate / self._sample_rate + new_samples = scikits.samplerate.resample( + self._samples, r=resample_ratio, type=quality) + self._samples = new_samples + self._sample_rate = new_sample_rate def pad_silence(self, duration, sides='both'): - raise NotImplementedError() + """Pads this audio sample with a period of silence. + + Note that this is an in-place transformation. + + :param duration: length of silence in seconds to pad + :type duration: float + :param sides: + 'beginning' - adds silence in the beginning + 'end' - adds silence in the end + 'both' - adds silence in both the beginning and the end. + :type sides: basestring + """ + if duration == 0.0: + return self + cls = type(self) + silence = cls.make_silence(duration, self._sample_rate) + if sides == "beginning": + padded = cls.concatenate(silence, self) + elif sides == "end": + padded = cls.concatenate(self, silence) + elif sides == "both": + padded = cls.concatenate(silence, self, silence) + else: + raise ValueError("Unknown value for the kwarg 'sides'") + self._samples = padded._samples + self._sample_rate = padded._sample_rate def subsegment(self, start_sec=None, end_sec=None): - raise NotImplementedError() + """Return new AudioSegment containing audio between given boundaries. + + :param start_sec: Beginning of subsegment in seconds, + (beginning of segment if None). + :type start_sec: scalar + :param end_sec: End of subsegment in seconds, + (end of segment if None). + :type end_sec: scalar + + :return: New AudioSegment containing specified + subsegment. + :trype: AudioSegment + """ + # Default boundaries + if start_sec is None: + start_sec = 0.0 + if end_sec is None: + end_sec = self.duration + + # negative boundaries are relative to end of segment + if start_sec < 0.0: + start_sec = self.duration + start_sec + if end_sec < 0.0: + end_sec = self.duration + end_sec - def convolve(self, filter, allow_resample=False): - raise NotImplementedError() + start_sample = int(round(start_sec * self._sample_rate)) + end_sample = int(round(end_sec * self._sample_rate)) + samples = self._samples[start_sample:end_sample] - def convolve_and_normalize(self, filter, allow_resample=False): - raise NotImplementedError() + return type(self)(samples, sample_rate=self._sample_rate) + + def random_subsegment(self, subsegment_length, rng=None): + """ + Return a random subsegment of a specified length in seconds. + + :param subsegment_length: Subsegment length in seconds. + :type subsegment_length: scalar + :param rng: Random number generator state + :type rng: random.Random [optional] + + + :return:clip (SpeechDLSegment): New SpeechDLSegmen containing random + subsegment of original segment. + """ + if rng is None: + rng = random.Random() + + if subsegment_length > self.duration: + raise ValueError("Length of subsegment must not be greater " + "than original segment.") + start_time = rng.uniform(0.0, self.duration - subsegment_length) + return self.subsegment(start_time, start_time + subsegment_length) + + def convolve(self, ir, allow_resampling=False): + """Convolve this audio segment with the given filter. + + :param ir: impulse response + :type ir: AudioSegment + :param allow_resampling: indicates whether resampling is allowed + when the ir has a different sample rate from this signal. + :type allow_resampling: boolean + """ + if allow_resampling and self.sample_rate != ir.sample_rate: + ir = ir.resample(self.sample_rate) + + if self.sample_rate != ir.sample_rate: + raise ValueError("Impulse response sample rate ({}Hz) is " + "equal to base signal sample rate ({}Hz)." + .format(ir.sample_rate, self.sample_rate)) + + samples = signal.fftconvolve(self.samples, ir.samples, "full") + self._samples = samples + + def convolve_and_normalize(self, ir, allow_resample=False): + """Convolve and normalize the resulting audio segment so that it + has the same average power as the input signal. + + :param ir: impulse response + :type ir: AudioSegment + :param allow_resampling: indicates whether resampling is allowed + when the ir has a different sample rate from this signal. + :type allow_resampling: boolean + """ + self.convolve(ir, allow_resampling=allow_resampling) + self.normalize(target_db=self.rms_db) + + def add_noise(self, + noise, + snr_dB, + allow_downsampling=False, + max_gain_db=300.0, + rng=None): + """Adds the given noise segment at a specific signal-to-noise ratio. + If the noise segment is longer than this segment, a random subsegment + of matching length is sampled from it and used instead. + + :param noise: Noise signal to add. + :type noise: SpeechDLSegment + :param snr_dB: Signal-to-Noise Ratio, in decibels. + :type snr_dB: scalar + :param allow_downsampling: whether to allow the noise signal + to be downsampled to match the base signal sample rate. + :type allow_downsampling: boolean + :param max_gain_db: Maximum amount of gain to apply to noise + signal before adding it in. This is to prevent attempting + to apply infinite gain to a zero signal. + :type max_gain_db: scalar + :param rng: Random number generator state. + :type rng: random.Random + + Returns: + SpeechDLSegment: signal with noise added. + """ + if rng is None: + rng = random.Random() + + if allow_downsampling and noise.sample_rate > self.sample_rate: + noise = noise.resample(self.sample_rate) + + if noise.sample_rate != self.sample_rate: + raise ValueError("Noise sample rate ({}Hz) is not equal to " + "base signal sample rate ({}Hz)." + .format(noise.sample_rate, self.sample_rate)) + if noise.duration < self.duration: + raise ValueError("Noise signal ({} sec) must be at " + "least as long as base signal ({} sec)." + .format(noise.duration, self.duration)) + noise_gain_db = self.rms_db - noise.rms_db - snr_dB + noise_gain_db = min(max_gain_db, noise_gain_db) + noise_subsegment = noise.random_subsegment(self.duration, rng=rng) + output = self + self.tranform_noise(noise_subsegment, noise_gain_db) + self._samples = output._samples + self._sample_rate = output._sample_rate @property def samples(self): @@ -186,7 +554,7 @@ class AudioSegment(object): :return: Number of samples. :rtype: int """ - return self._samples.shape(0) + return self._samples.shape[0] @property def duration(self): @@ -250,3 +618,9 @@ class AudioSegment(object): else: raise TypeError("Unsupported sample type: %s." % samples.dtype) return output_samples.astype(dtype) + + def tranform_noise(self, noise_subsegment, noise_gain_db): + """ tranform noise file + """ + return type(self)(noise_subsegment._samples * (10.**( + noise_gain_db / 20.)), noise_subsegment._sample_rate) diff --git a/data_utils/augmentor/audio_database.py b/data_utils/augmentor/audio_database.py new file mode 100755 index 000000000..e41c6dd72 --- /dev/null +++ b/data_utils/augmentor/audio_database.py @@ -0,0 +1,401 @@ +from __future__ import print_function +from collections import defaultdict +import bisect +import logging +import numpy as np +import os +import random +import sys + +UNK_TAG = "" + + +def stream_audio_index(fname, UNK=UNK_TAG): + """Reads an audio index file and emits one record in the index at a time. + + :param fname: audio index path + :type fname: basestring + :param UNK: UNK token to denote that certain audios are not tagged. + :type UNK: basesring + + Yields: + idx, duration, size, relpath, tags (int, float, int, str, list(str)): + audio file id, length of the audio in seconds, size in byte, + relative path w.r.t. to the root noise directory, list of tags + """ + with open(fname) as audio_index_file: + for i, line in enumerate(audio_index_file): + tok = line.strip().split("\t") + assert len(tok) >= 4, \ + "Invalid line at line {} in file {}".format( + i + 1, audio_index_file) + idx = int(tok[0]) + duration = float(tok[1]) + # Sometimes, the duration can round down to 0.0 + assert duration >= 0.0, \ + "Invalid duration at line {} in file {}".format( + i + 1, audio_index_file) + size = int(tok[2]) + assert size > 0, \ + "Invalid size at line {} in file {}".format( + i + 1, audio_index_file) + relpath = tok[3] + if len(tok) == 4: + tags = [UNK_TAG] + else: + tags = tok[4:] + yield idx, duration, size, relpath, tags + + +def truncate_float(val, ndigits=6): + """ Truncates a floating-point value to have the desired number of + digits after the decimal point. + + :param val: input value. + :type val: float + :parma ndigits: desired number of digits. + :type ndigits: int + + :return: truncated value + :rtype: float + """ + p = 10.0**ndigits + return float(int(val * p)) / p + + +def print_audio_index(idx, duration, size, relpath, tags, file=sys.stdout): + """Prints an audio record to the index file. + + :param idx: Audio file id. + :type idx: int + :param duration: length of the audio in seconds + :type duration: float + :param size: size of the file in bytes + :type size: int + :param relpath: relative path w.r.t. to the root noise directory. + :type relpath: basestring + :parma tags: list of tags + :parma tags: list(str) + :parma file: file to which we want to write an audio record. + :type file: sys.stdout + """ + file.write("{}\t{:.6f}\t{}\t{}" + .format(idx, truncate_float(duration, ndigits=6), size, relpath)) + for tag in tags: + file.write("\t{}".format(tag)) + file.write("\n") + + +class AudioIndex(object): + """ In-memory index of audio files that do not have annotations. + This supports duration-based sampling and sampling from a target + distribution. + + Each line in the index file consists of the following fields: + (id (int), duration (float), size (int), relative path (str), + list of tags ([str])) + """ + + def __init__(self): + self.audio_dir = None + self.index_fname = None + self.tags = None + self.bin_size = 2.0 + self.clear() + + def clear(self): + """ Clears the index + + Returns: + None + """ + self.idx_to_record = {} + # The list of indices correspond to audio files whose duration is + # greater than or equal to the key. + self.duration_to_id_set = {} + self.duration_to_id_set_per_tag = defaultdict(lambda: {}) + self.duration_to_list = defaultdict(lambda: []) + self.duration_to_list_per_tag = defaultdict( + lambda: defaultdict(lambda: [])) + self.tag_to_id_set = defaultdict(lambda: set()) + self.shared_duration_bins = [] + self.id_set_complete = set() + self.id_set = set() + self.duration_bins = [] + + def has_audio(self, distr=None): + """ + :param distr: The target distribution of audio tags that we want to + match. If this is not supplied, the function simply checks that + there are some audio files. + :parma distr: dict + :return: True if there are audio files. + :rtype: boolean + """ + if distr is None: + return len(self.id_set) > 0 + else: + for tag in distr: + if tag not in self.duration_to_list_per_tag: + return False + return True + + def _load_all_records_from_disk(self, audio_dir, idx_fname, bin_size): + """Loads all audio records from the disk into memory and groups them + into chunks based on their duration and the bin_size granalarity. + + Once all the records are read, indices are built from these records + by another function so that the audio samples can be drawn efficiently. + + Updates: + self.audio_dir (path): audio root directory + self.idx_fname (path): audio database index filename + self.bin_size (float): granularity of bins + self.idx_to_record (dict): maps from the audio id to + (duration, file_size, relative_path, tags) + self.tag_to_id_set (dict): maps from the tag to + the set of id's of audios that have this tag. + self.id_set_complete (set): set of all audio id's in the index file + self.min_duration (float): minimum audio duration observed in the + index file + self.duration_bins (list): the lower bounds on the duration of + audio files falling in each bin + self.duration_to_id_set (dict): contains (k, v) where v is the set + of id's of audios whose lengths are longer than or equal to k. + (e.g. k is the duration lower bound of this bin). + self.duration_to_id_set_per_tag (dict): Something like above but + has a finer granularity mapping from the tag to + duration_to_id_set. + self.shared_duration_bins (list): list of sets where each set + contains duration lower bounds whose audio id sets are the + same. The rationale for having this is that there are a few + but extremely long audio files which lead to a lot of bins. + When the id sets do not change across various minimum duration + boundaries, we + cluster these together and make them point to the same id set + reference. + + :return: whether the records were read from the disk. The assumption is + that the audio index file on disk and the actual audio files + are constructed once and never change during training. We only + re-read when either the directory or the index file path change. + """ + if self.audio_dir == audio_dir and self.idx_fname == idx_fname and \ + self.bin_size == bin_size: + # The audio directory and/or the list of audio files + # haven't changed. No need to load the list again. + return False + + # Remember where the audio index is most recently read from. + self.audio_dir = audio_dir + self.idx_fname = idx_fname + self.bin_size = bin_size + + # Read in the idx and compute the number of bins necessary + self.clear() + rank = [] + min_duration = float('inf') + max_duration = float('-inf') + for idx, duration, file_size, relpath, tags in \ + stream_audio_index(idx_fname): + self.idx_to_record[idx] = (duration, file_size, relpath, tags) + max_duration = max(max_duration, duration) + min_duration = min(min_duration, duration) + rank.append((duration, idx)) + for tag in tags: + self.tag_to_id_set[tag].add(idx) + if len(rank) == 0: + # file is empty + raise IOError("Index file {} is empty".format(idx_fname)) + for tag in self.tag_to_id_set: + self.id_set_complete |= self.tag_to_id_set[tag] + dur = min_duration + self.min_duration = min_duration + while dur < max_duration + bin_size: + self.duration_bins.append(dur) + dur += bin_size + + # Sort in decreasing order of duration and populate + # the cumulative indices lists. + rank.sort(reverse=True) + + # These are indices for `rank` and used to keep track of whether + # there are new records to add in the current bin. + last = 0 + cur = 0 + + # The set of audios falling in the previous bin; in the case, + # where we don't find new audios for the current bin, we store + # the reference to the last set so as to conserve memory. + # This is not such a big problem if the audio duration is + # bounded by a small number like 30 seconds and the + # bin size is big enough. But, for raw freesound audios, + # some audios can be as long as a few hours! + last_audio_set = set() + + # The same but for each tag so that we can pick audios based on + # tags and also some user-specified tag distribution. + last_audio_set_per_tag = defaultdict(lambda: set()) + + # Set of lists of bins sharing the same audio sets. + shared = set() + + for i in range(len(self.duration_bins) - 1, -1, -1): + lower_bound = self.duration_bins[i] + new_audio_idxs = set() + new_audio_idxs_per_tag = defaultdict(lambda: set()) + while cur < len(rank) and rank[cur][0] >= lower_bound: + idx = rank[cur][1] + tags = self.idx_to_record[idx][3] + new_audio_idxs.add(idx) + for tag in tags: + new_audio_idxs_per_tag[tag].add(idx) + cur += 1 + # This makes certain that the same list is shared across + # different bins if no new indices are added. + if cur == last: + shared.add(lower_bound) + else: + last_audio_set = last_audio_set | new_audio_idxs + for tag in new_audio_idxs_per_tag: + last_audio_set_per_tag[tag] = \ + last_audio_set_per_tag[tag] | \ + new_audio_idxs_per_tag[tag] + if len(shared) > 0: + self.shared_duration_bins.append(shared) + shared = set([lower_bound]) + ### last_audio_set = set() should set blank + last = cur + self.duration_to_id_set[lower_bound] = last_audio_set + for tag in last_audio_set_per_tag: + self.duration_to_id_set_per_tag[lower_bound][tag] = \ + last_audio_set_per_tag[tag] + + # The last `shared` record isn't added to the `shared_duration_bins`. + self.shared_duration_bins.append(shared) + + # We make sure that the while loop above has exhausted through the + # `rank` list by checking if the `cur`rent index in `rank` equals + # the length of the array, which is the halting condition. + assert cur == len(rank) + + return True + + def _build_index_from_records(self, tag_list): + """ Uses the in-memory records read from the index file to build + an in-memory index restricted to the given tag list. + + :param tag_list: List of tags we are interested in sampling from. + :type tag_list: list(str) + + Updates: + self.id_set (set): the set of all audio id's that can be sampled. + self.duration_to_list (dict): maps from the duration lower bound + to the id's of audios longer than this duration. + self.duration_to_list_per_tag (dict): maps from the tag to + the same structure as self.duration_to_list. This is to support + sampling from a target noise distribution. + + :return: whether the index was built from scratch + """ + if self.tags == tag_list: + return False + + self.tags = tag_list + if len(tag_list) == 0: + self.id_set = self.id_set_complete + else: + self.id_set = set() + for tag in tag_list: + self.id_set |= self.tag_to_id_set[tag] + + # Next, we need to take a subset of the audio files + for shared in self.shared_duration_bins: + # All bins in `shared' have the same index lists + # so we can intersect once and set all of them to this list. + lb = list(shared)[0] + intersected = list(self.id_set & self.duration_to_id_set[lb]) + duration_to_id_set = self.duration_to_id_set_per_tag[lb] + intersected_per_tag = { + tag: self.tag_to_id_set[tag] & duration_to_id_set[tag] + for tag in duration_to_id_set + } + for bin_key in shared: + self.duration_to_list[bin_key] = intersected + for tag in intersected_per_tag: + self.duration_to_list_per_tag[tag][bin_key] = \ + intersected_per_tag[tag] + assert len(self.duration_to_list) == len(self.duration_to_id_set) + return True + + def refresh_records_from_index_file(self, + audio_dir, + idx_fname, + tag_list, + bin_size=2.0): + """ Loads the index file and populates the records + for building the internal index. + + If the audio directory or index file name has changed, the whole index + is reloaded from scratch. If only the tag_list is changed, then the + desired index is built from the complete, in-memory record. + + :param audio_dir: audio directory + :type audio_dir: basestring + :param idx_fname: audio index file name + :type idex_fname: basestring + :param tag_list: list of tags we are interested in loading; + if empty, we load all. + :type tag_list: list + :param bin_size: optional argument for controlling the granularity + of duration bins + :type bin_size: float + """ + if tag_list is None: + tag_list = [] + reloaded_records = self._load_all_records_from_disk(audio_dir, + idx_fname, bin_size) + if reloaded_records or self.tags != tag_list: + self._build_index_from_records(tag_list) + logger.info('loaded {} audio files from {}' + .format(len(self.id_set), idx_fname)) + + def sample_audio(self, duration, rng=None, distr=None): + """ Uniformly draws an audio record of at least the desired duration + + :param duration: minimum desired audio duration + :type duration: float + :param rng: random number generator + :type rng: random.Random + :param distr: target distribution of audio tags. If not provided, + :type distr: dict + all audio files are sampled uniformly at random. + + :returns: success, (duration, file_size, path) + """ + if duration < 0.0: + duration = self.min_duration + i = bisect.bisect_left(self.duration_bins, duration) + if i == len(self.duration_bins): + return False, None + bin_key = self.duration_bins[i] + if distr is None: + indices = self.duration_to_list[bin_key] + else: + # If a desired audio distribution is given, we sample from it. + if rng is None: + rng = random.Random() + nprng = np.random.RandomState(rng.getrandbits(32)) + prob_masses = distr.values() + prob_masses /= np.sum(prob_masses) + tag = nprng.choice(distr.keys(), p=prob_masses) + indices = self.duration_to_list_per_tag[tag][bin_key] + if len(indices) == 0: + return False, None + else: + if rng is None: + rng = random.Random() + # duration, file size and relative path from root + s = self.idx_to_record[rng.sample(indices, 1)[0]] + s = (s[0], s[1], os.path.join(self.audio_dir, s[2])) + return True, s diff --git a/data_utils/augmentor/augmentation.py b/data_utils/augmentor/augmentation.py index abe1a0ec8..c0a70ad18 100755 --- a/data_utils/augmentor/augmentation.py +++ b/data_utils/augmentor/augmentation.py @@ -6,6 +6,11 @@ from __future__ import print_function import json import random from data_utils.augmentor.volume_perturb import VolumePerturbAugmentor +from data_utils.augmentor.resamler import ResamplerAugmentor +from data_utils.augmentor.speed_perturb import SpeedPerturbatioAugmentor +from data_utils.augmentor.online_bayesian_normalization import OnlineBayesianNormalizationAugmentor +from data_utils.augmentor.Impulse_response import ImpulseResponseAugmentor +from data_utils.augmentor.noise_speech import NoiseSpeechAugmentor class AugmentationPipeline(object): @@ -76,5 +81,15 @@ class AugmentationPipeline(object): """Return an augmentation model by the type name, and pass in params.""" if augmentor_type == "volume": return VolumePerturbAugmentor(self._rng, **params) + if augmentor_type == "resamle": + return ResamplerAugmentor(self._rng, **params) + if augmentor_type == "speed": + return SpeedPerturbatioAugmentor(self._rng, **params) + if augmentor_type == "online_bayesian_normalization": + return OnlineBayesianNormalizationAugmentor(self._rng, **params) + if augmentor_type == "Impulse_response": + return ImpulseResponseAugmentor(self._rng, **params) + if augmentor_type == "noise_speech": + return NoiseSpeechAugmentor(self._rng, **params) else: raise ValueError("Unknown augmentor type [%s]." % augmentor_type) diff --git a/data_utils/augmentor/implus_response.py b/data_utils/augmentor/implus_response.py new file mode 100755 index 000000000..cc2053421 --- /dev/null +++ b/data_utils/augmentor/implus_response.py @@ -0,0 +1,76 @@ +""" Impulse response""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from . import base +from . import audio_database +from data_utils.speech import SpeechSegment + + +class ImpulseResponseAugmentor(base.AugmentorBase): + """ Instantiates an impulse response model + + :param ir_dir: directory containing impulse responses + :type ir_dir: basestring + :param tags: optional parameter for specifying what + particular impulse responses to apply. + :type tags: list + :parm tag_distr: optional noise distribution + :type tag_distr: dict + """ + + def __init__(self, rng, ir_dir, index_file, tags=None, tag_distr=None): + # Define all required parameter maps here. + self.ir_dir = ir_dir + self.index_file = index_file + + self.tags = tags + self.tag_distr = tag_distr + + self.audio_index = audio_database.AudioIndex() + self.rng = rng + + def _init_data(self): + """ Preloads stuff from disk in an attempt (e.g. list of files, etc) + to make later loading faster. If the data configuration remains the + same, this function does nothing. + + """ + self.audio_index.refresh_records_from_index_file( + self.ir_dir, self.index_file, self.tags) + + def transform_audio(self, audio_segment): + """ Convolves the input audio with an impulse response. + + :param audio_segment: input audio + :type audio_segment: AudioSegemnt + """ + # This handles the cases where the data source or directories change. + self._init_data() + + read_size = 0 + tag_distr = self.tag_distr + if not self.audio_index.has_audio(tag_distr): + if tag_distr is None: + if not self.tags: + raise RuntimeError("The ir index does not have audio " + "files to sample from.") + else: + raise RuntimeError("The ir index does not have audio " + "files of the given tags to sample " + "from.") + else: + raise RuntimeError("The ir index does not have audio " + "files to match the target ir " + "distribution.") + else: + # Querying with a negative duration triggers the index to search + # from all impulse responses. + success, record = self.audio_index.sample_audio( + -1.0, rng=self.rng, distr=tag_distr) + if success is True: + _, read_size, ir_fname = record + ir_wav = SpeechSegment.from_file(ir_fname) + audio_segment.convolve(ir_wav, allow_resampling=True) diff --git a/data_utils/augmentor/noise_speech.py b/data_utils/augmentor/noise_speech.py new file mode 100755 index 000000000..8cf7c27b6 --- /dev/null +++ b/data_utils/augmentor/noise_speech.py @@ -0,0 +1,318 @@ +""" noise speech +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import logging +import numpy as np +import os +from collections import defaultdict + +from . import base +from . import audio_database +from data_utils.speech import SpeechSegment + +TURK = "turk" +USE_AUDIO_DATABASE_SOURCES = frozenset(["freesound", "chime"]) +HALF_NOISE_LENGTH_MIN_THRESHOLD = 3.0 +FIND_NOISE_MAX_ATTEMPTS = 20 + +logger = logging.getLogger(__name__) + + +def get_first_smaller(items, value): + index = bisect.bisect_left(items, value) - 1 + assert items[index] < value, \ + 'get_first_smaller failed! %d %d' % (items[index], value) + return items[index] + + +def get_first_larger(items, value): + 'Find leftmost value greater than value' + index = bisect.bisect_right(items, value) + assert index < len(items), \ + "no noise bin exists for this audio length (%f)" % value + assert items[index] > value, \ + 'get_first_larger failed! %d %d' % (items[index], value) + return items[index] + + +def _get_turk_noise_files(noise_dir, index_file): + """ Creates a map from duration => a list of noise filenames + + :param noise_dir: Directory of noise files which contains + "noise-samples-list" + :type noise_dir: basestring + :param index_file: Noise list + :type index_file: basestring + + returns:noise_files (defaultdict): A map of bins to noise files. + Each key is the duration, and the value is a list of noise + files binned to this duration. Each bin is 2 secs. + + Note: noise-samples-list should contain one line per noise (wav) file + along with its duration in milliseconds + """ + noise_files = defaultdict(list) + if not os.path.exists(index_file): + logger.error('No noise files were found at {}'.format(index_file)) + return noise_files + num_noise_files = 0 + rounded_durations = list(range(0, 65, 2)) + with open(index_file, 'r') as fl: + for line in fl: + fname = os.path.join(noise_dir, line.strip().split()[0]) + duration = float(line.strip().split()[1]) / 1000 + # bin the noise files into length bins rounded by 2 sec + bin_id = get_first_smaller(rounded_durations, duration) + noise_files[bin_id].append(fname) + num_noise_files += 1 + logger.info('Loaded {} turk noise files'.format(num_noise_files)) + return noise_files + + +class NoiseSpeechAugmentor(base.AugmentorBase): + """ Noise addition block + + :param snr_min: minimum signal-to-noise ratio + :type snr_min: float + :param snr_max: maximum signal-to-noise ratio + :type snr_max: float + :param noise_dir: root of where noise files are stored + :type noise_fir: basestring + :param index_file: index of noises of interest in noise_dir + :type index_file: basestring + :param source: select one from + - turk + - freesound + - chime + Note that this field is no longer required for the freesound + and chime + :type source: string + :param tags: optional parameter for specifying what + particular noises we want to add. See above for the available tags. + :type tags: list + :param tag_distr: optional noise distribution + :type tag_distr: dict + """ + + def __init__(self, + rng, + snr_min, + snr_max, + noise_dir, + source, + allow_downsampling=None, + index_file=None, + tags=None, + tag_distr=None): + # Define all required parameter maps here. + self.rng = rng + self.snr_min = snr_min + self.snr_max = snr_max + self.noise_dir = noise_dir + self.source = source + + self.allow_downsampling = allow_downsampling + self.index_file = index_file + self.tags = tags + self.tag_distr = tag_distr + + # When new noise sources are added, make sure to define the + # associated bookkeeping variables here. + self.turk_noise_files = [] + self.turk_noise_dir = None + self.audio_index = audio_database.AudioIndex() + + def _init_data(self): + """ Preloads stuff from disk in an attempt (e.g. list of files, etc) + to make later loading faster. If the data configuration remains the + same, this function does nothing. + + """ + noise_dir = self.noise_dir + index_file = self.index_file + source = self.source + if not index_file: + if source == TURK: + index_file = os.path.join(noise_dir, 'noise-samples-list') + logger.debug("index_file not provided; " + "defaulting to " + + index_file) + else: + if source != "": + assert source in USE_AUDIO_DATABASE_SOURCES, \ + "{} not supported by audio_database".format(source) + index_file = os.path.join(noise_dir, + "audio_index_commercial.txt") + logger.debug("index_file not provided; " + "defaulting to " + + index_file) + + if source == TURK: + if self.turk_noise_dir != noise_dir: + self.turk_noise_dir = noise_dir + self.turk_noise_files = _get_turk_noise_files(noise_dir, + index_file) + # elif source == TODO_SUPPORT_NON_AUDIO_DATABASE_BASED_SOURCES: + else: + if source != "": + assert source in USE_AUDIO_DATABASE_SOURCES, \ + "{} not supported by audio_database".format(source) + self.audio_index.refresh_records_from_index_file( + self.noise_dir, index_file, self.tags) + + def transform_audio(self, audio_segment): + """Adds walla noise + + :param audio_segment: Input audio + :type audio_segment: SpeechSegment + """ + # This handles the cases where the data source or directories change. + self._init_data + source = self.source + allow_downsampling = self.allow_downsampling + if source == TURK: + self._add_turk_noise(audio_segment, self.rng, allow_downsampling) + # elif source == TODO_SUPPORT_NON_AUDIO_DATABASE_BASED_SOURCES: + else: + self._add_noise(audio_segment, self.rng, allow_downsampling) + + def _sample_snr(self): + """ Returns a float sampled in [`self.snr_min`, `self.snr_max`] + if both `self.snr_min` and `self.snr_max` are non-zero. + """ + snr_min = self.snr_min + snr_max = self.snr_max + sampled_snr = self.rng.uniform(snr_min, snr_max) + return sampled_snr + + def _add_turk_noise(self, audio_segment, allow_downsampling): + """ Adds a turk noise to the input audio. + + :param audio_segment: input audio + :type audio_segment: audiosegment + :param allow_downsampling: indicates whether downsampling + is allowed + :type allow_downsampling: boolean + """ + read_size = 0 + if len(self.turk_noise_files) > 0: + snr = self._sample_snr(self.rng) + # Draw the noise file randomly from noise files that are + # slightly longer than the utterance + noise_bins = sorted(self.turk_noise_files.keys()) + # note some bins can be empty, so we can't just round up + # to the nearest 2-sec interval + rounded_duration = get_first_larger(noise_bins, + audio_segment.duration) + noise_fname = \ + self.rng.sample(self.turk_noise_files[rounded_duration], 1)[0] + noise = SpeechSegment.from_wav_file(noise_fname) + logger.debug('noise_fname {}'.format(noise_fname)) + logger.debug('snr {}'.format(snr)) + read_size = len(noise) * 2 + # May throw exceptions, but this is caught by + # AudioFeaturizer.get_audio_files. + audio_segment.add_noise( + noise, snr, rng=self.rng, allow_downsampling=allow_downsampling) + + def _add_noise(self, audio_segment, allow_downsampling): + """ Adds a noise indexed in audio_database.AudioIndex. + + :param audio_segment: input audio + :type audio_segment: SpeechSegment + :param allow_downsampling: indicates whether downsampling + is allowed + :type allow_downsampling: boolean + + Returns: + (SpeechSegment, int) + - sound with turk noise added + - number of bytes read from disk + """ + read_size = 0 + tag_distr = self.tag_distr + if not self.audio_index.has_audio(tag_distr): + if tag_distr is None: + if not self.tags: + raise RuntimeError("The noise index does not have audio " + "files to sample from.") + else: + raise RuntimeError("The noise index does not have audio " + "files of the given tags to sample " + "from.") + else: + raise RuntimeError("The noise index does not have audio " + "files to match the target noise " + "distribution.") + else: + # Compute audio segment related statistics + audio_duration = audio_segment.duration + + # Sample relevant augmentation parameters. + snr = self._sample_snr(self.rng) + + # Perhaps, we may not have a sufficiently long noise, so we need + # to search iteratively. + min_duration = audio_duration + 0.25 + for _ in range(FIND_NOISE_MAX_ATTEMPTS): + logger.debug("attempting to find noise of length " + "at least {}".format(min_duration)) + + success, record = \ + self.audio_index.sample_audio(min_duration, + rng=self.rng, + distr=tag_distr) + + if success is True: + noise_duration, read_size, noise_fname = record + + # Assert after logging so we know + # what caused augmentation to fail. + logger.debug("noise_fname {}".format(noise_fname)) + logger.debug("snr {}".format(snr)) + assert noise_duration >= min_duration + break + + # Decrease the desired minimum duration linearly. + # If the value becomes smaller than some threshold, + # we half the value instead. + if min_duration > HALF_NOISE_LENGTH_MIN_THRESHOLD: + min_duration -= 2.0 + else: + min_duration *= 0.5 + + if success is False: + logger.info("Failed to find a noise file") + return + + diff_duration = audio_duration + 0.25 - noise_duration + if diff_duration >= 0.0: + # Here, the noise is shorter than the audio file, so + # we pad with zeros to make sure the noise sound is applied + # with a uniformly random shift. + noise = SpeechSegment.from_file(noise_fname) + noise = noise.pad_silence(diff_duration, sides="both") + else: + # The noise clip is at least ~25 ms longer than the audio + # segment here. + diff_duration = int(noise_duration * audio_segment.sample_rate) - \ + int(audio_duration * audio_segment.sample_rate) - \ + int(0.02 * audio_segment.sample_rate) + start = float(self.rng.randint(0, diff_duration)) / \ + audio.sample_rate + finish = min(start + audio_duration + 0.2, noise_duration) + noise = SpeechSegment.slice_from_file(noise_fname, start, + finish) + + if len(noise) < len(audio_segment): + # This is to ensure that the noise clip is at least as + # long as the audio segment. + num_samples_to_pad = len(audio_segment) - len(noise) + # Padding this amount of silence on both ends ensures that + # the placement of the noise clip is uniformly random. + silence = SpeechSegment( + np.zeros(num_samples_to_pad), audio_segment.sample_rate) + noise = SpeechSegment.concatenate(silence, noise, silence) + + audio_segment.add_noise( + noise, snr, rng=self.rng, allow_downsampling=allow_downsampling) diff --git a/data_utils/augmentor/online_bayesian_normalization.py b/data_utils/augmentor/online_bayesian_normalization.py new file mode 100755 index 000000000..bc2d6c1b6 --- /dev/null +++ b/data_utils/augmentor/online_bayesian_normalization.py @@ -0,0 +1,57 @@ +""" Online bayesian normalization +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from . import base + + +class OnlineBayesianNormalizationAugmentor(base.AugmentorBase): + """ + Instantiates an online bayesian normalization module. + :param target_db: Target RMS value in decibels + :type target_db: func[int->scalar] + :param prior_db: Prior RMS estimate in decibels + :type prior_db: func[int->scalar] + :param prior_samples: Prior strength in number of samples + :type prior_samples: func[int->scalar] + :param startup_delay: Start-up delay in seconds during + which normalization statistics is accrued. + :type starup_delay: func[int->scalar] + """ + + def __init__(self, + rng, + target_db, + prior_db, + prior_samples, + startup_delay=base.parse_parameter_from(0.0)): + + self.target_db = target_db + self.prior_db = prior_db + self.prior_samples = prior_samples + self.startup_delay = startup_delay + self.rng = rng + + def transform_audio(self, audio_segment): + """ + Normalizes the input audio using the online Bayesian approach. + + :param audio_segment: input audio + :type audio_segment: SpeechSegment + :param iteration: current iteration + :type iteration: int + :param text: audio transcription + :type text: basestring + :param rng: RNG to use for augmentation + :type rng: random.Random + + """ + read_size = 0 + target_db = self.target_db(iteration) + prior_db = self.prior_db(iteration) + prior_samples = self.prior_samples(iteration) + startup_delay = self.startup_delay(iteration) + audio.normalize_online_bayesian( + target_db, prior_db, prior_samples, startup_delay=startup_delay) diff --git a/data_utils/augmentor/resampler.py b/data_utils/augmentor/resampler.py new file mode 100755 index 000000000..1b959be56 --- /dev/null +++ b/data_utils/augmentor/resampler.py @@ -0,0 +1,30 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from . import base + + +class ResamplerAugmentor(base.AugmentorBase): + """ Instantiates a resampler module. + + :param new_sample_rate: New sample rate in Hz + :type new_sample_rate: func[int->scalar] + :param rng: Random generator object. + :type rng: random.Random + """ + + def __init__(self, rng, new_sample_rate): + self.new_sample_rate = new_sample_rate + self._rng = rng + + def transform_audio(self, audio_segment): + """ Resamples the input audio to the target sample rate. + + Note that this is an in-place transformation. + + :param audio: input audio + :type audio: SpeechDLSegment + """ + new_sample_rate = self.new_sample_rate + audio.resample(new_sample_rate) \ No newline at end of file diff --git a/data_utils/augmentor/speed_perturb.py b/data_utils/augmentor/speed_perturb.py new file mode 100755 index 000000000..e09be5f74 --- /dev/null +++ b/data_utils/augmentor/speed_perturb.py @@ -0,0 +1,53 @@ +"""Speed perturbation module for making ASR robust to different voice +types (high pitched, low pitched, etc) +Samples uniformly between speed_min and speed_max +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from . import base + + +class SpeedPerturbatioAugmentor(base.AugmentorBase): + """ + Instantiates a speed perturbation module. + + See reference paper here: + + http://www.danielpovey.com/files/2015_interspeech_augmentation.pdf + + :param speed_min: Lower bound on new rate to sample + :type speed_min: func[int->scalar] + :param speed_max: Upper bound on new rate to sample + :type speed_max: func[int->scalar] + """ + + def __init__(self, rng, speed_min, speed_max): + + if (speed_min < 0.9): + raise ValueError( + "Sampling speed below 0.9 can cause unnatural effects") + if (speed_min > 1.1): + raise ValueError( + "Sampling speed above 1.1 can cause unnatural effects") + self.speed_min = speed_min + self.speed_max = speed_max + self.rng = rng + + def transform_audio(self, audio_segment): + """ + Samples a new speed rate from the given range and + changes the speed of the given audio clip. + + Note that this is an in-place transformation. + + :param audio_segment: input audio + :type audio_segment: SpeechDLSegment + """ + read_size = 0 + speed_min = self.speed_min(iteration) + speed_max = self.speed_max(iteration) + sampled_speed = rng.uniform(speed_min, speed_max) + audio = audio.change_speed(sampled_speed) diff --git a/data_utils/augmentor/volume_perturb.py b/data_utils/augmentor/volume_perturb.py index a5a9f6cad..15055b915 100755 --- a/data_utils/augmentor/volume_perturb.py +++ b/data_utils/augmentor/volume_perturb.py @@ -3,10 +3,10 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from data_utils.augmentor.base import AugmentorBase +from . import base -class VolumePerturbAugmentor(AugmentorBase): +class VolumePerturbAugmentor(base.AugmentorBase): """Augmentation model for adding random volume perturbation. This is used for multi-loudness training of PCEN. See From ed5f04afb86e7285cdd2d9d36dbf4b63431b5968 Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Thu, 15 Jun 2017 17:05:00 +0800 Subject: [PATCH 040/335] Add shuffle type of instance_shuffle and batch_shuffle_clipped. --- data_utils/data.py | 50 ++++++++++++++++++++++------- datasets/librispeech/librispeech.py | 3 +- decoder.py | 6 ++-- infer.py | 11 +++---- train.py | 16 ++++++--- utils.py | 25 +++++++++++++++ 6 files changed, 82 insertions(+), 29 deletions(-) create mode 100644 utils.py diff --git a/data_utils/data.py b/data_utils/data.py index 48e03fe85..424343a48 100644 --- a/data_utils/data.py +++ b/data_utils/data.py @@ -80,7 +80,7 @@ class DataGenerator(object): padding_to=-1, flatten=False, sortagrad=False, - batch_shuffle=False): + shuffle_method="batch_shuffle"): """ Batch data reader creator for audio data. Return a callable generator function to produce batches of data. @@ -104,12 +104,22 @@ class DataGenerator(object): :param sortagrad: If set True, sort the instances by audio duration in the first epoch for speed up training. :type sortagrad: bool - :param batch_shuffle: If set True, instances are batch-wise shuffled. - For more details, please see - ``_batch_shuffle.__doc__``. - If sortagrad is True, batch_shuffle is disabled + :param shuffle_method: Shuffle method. Options: + '' or None: no shuffle. + 'instance_shuffle': instance-wise shuffle. + 'batch_shuffle': similarly-sized instances are + put into batches, and then + batch-wise shuffle the batches. + For more details, please see + ``_batch_shuffle.__doc__``. + 'batch_shuffle_clipped': 'batch_shuffle' with + head shift and tail + clipping. For more + details, please see + ``_batch_shuffle``. + If sortagrad is True, shuffle is disabled for the first epoch. - :type batch_shuffle: bool + :type shuffle_method: None|str :return: Batch reader function, producing batches of data when called. :rtype: callable """ @@ -123,8 +133,20 @@ class DataGenerator(object): # sort (by duration) or batch-wise shuffle the manifest if self._epoch == 0 and sortagrad: manifest.sort(key=lambda x: x["duration"]) - elif batch_shuffle: - manifest = self._batch_shuffle(manifest, batch_size) + else: + if shuffle_method == "batch_shuffle": + manifest = self._batch_shuffle( + manifest, batch_size, clipped=False) + elif shuffle_method == "batch_shuffle_clipped": + manifest = self._batch_shuffle( + manifest, batch_size, clipped=True) + elif shuffle_method == "instance_shuffle": + self._rng.shuffle(manifest) + elif not shuffle_method: + pass + else: + raise ValueError("Unknown shuffle method %s." % + shuffle_method) # prepare batches instance_reader = self._instance_reader_creator(manifest) batch = [] @@ -218,7 +240,7 @@ class DataGenerator(object): new_batch.append((padded_audio, text)) return new_batch - def _batch_shuffle(self, manifest, batch_size): + def _batch_shuffle(self, manifest, batch_size, clipped=False): """Put similarly-sized instances into minibatches for better efficiency and make a batch-wise shuffle. @@ -233,6 +255,9 @@ class DataGenerator(object): :param batch_size: Batch size. This size is also used for generate a random number for batch shuffle. :type batch_size: int + :param clipped: Whether to clip the heading (small shift) and trailing + (incomplete batch) instances. + :type clipped: bool :return: Batch shuffled mainifest. :rtype: list """ @@ -241,7 +266,8 @@ class DataGenerator(object): batch_manifest = zip(*[iter(manifest[shift_len:])] * batch_size) self._rng.shuffle(batch_manifest) batch_manifest = list(sum(batch_manifest, ())) - res_len = len(manifest) - shift_len - len(batch_manifest) - batch_manifest.extend(manifest[-res_len:]) - batch_manifest.extend(manifest[0:shift_len]) + if not clipped: + res_len = len(manifest) - shift_len - len(batch_manifest) + batch_manifest.extend(manifest[-res_len:]) + batch_manifest.extend(manifest[0:shift_len]) return batch_manifest diff --git a/datasets/librispeech/librispeech.py b/datasets/librispeech/librispeech.py index faf038cc1..87e52ae4a 100644 --- a/datasets/librispeech/librispeech.py +++ b/datasets/librispeech/librispeech.py @@ -37,8 +37,7 @@ MD5_TRAIN_CLEAN_100 = "2a93770f6d5c6c964bc36631d331a522" MD5_TRAIN_CLEAN_360 = "c0e676e450a7ff2f54aeade5171606fa" MD5_TRAIN_OTHER_500 = "d1a0fd59409feb2c614ce4d30c387708" -parser = argparse.ArgumentParser( - description='Downloads and prepare LibriSpeech dataset.') +parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( "--target_dir", default=DATA_HOME + "/Libri", diff --git a/decoder.py b/decoder.py index 8314885ce..77d950b8d 100644 --- a/decoder.py +++ b/decoder.py @@ -8,8 +8,7 @@ from itertools import groupby def ctc_best_path_decode(probs_seq, vocabulary): - """ - Best path decoding, also called argmax decoding or greedy decoding. + """Best path decoding, also called argmax decoding or greedy decoding. Path consisting of the most probable tokens are further post-processed to remove consecutive repetitions and all blanks. @@ -38,8 +37,7 @@ def ctc_best_path_decode(probs_seq, vocabulary): def ctc_decode(probs_seq, vocabulary, method): - """ - CTC-like sequence decoding from a sequence of likelihood probablilites. + """CTC-like sequence decoding from a sequence of likelihood probablilites. :param probs_seq: 2-D list of probabilities over the vocabulary for each character. Each element is a list of float probabilities diff --git a/infer.py b/infer.py index f7c99df11..06449ab05 100644 --- a/infer.py +++ b/infer.py @@ -10,9 +10,9 @@ import paddle.v2 as paddle from data_utils.data import DataGenerator from model import deep_speech2 from decoder import ctc_decode +import utils -parser = argparse.ArgumentParser( - description='Simplified version of DeepSpeech2 inference.') +parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( "--num_samples", default=10, @@ -62,9 +62,7 @@ args = parser.parse_args() def infer(): - """ - Max-ctc-decoding for DeepSpeech2. - """ + """Max-ctc-decoding for DeepSpeech2.""" # initialize data generator data_generator = DataGenerator( vocab_filepath=args.vocab_filepath, @@ -98,7 +96,7 @@ def infer(): manifest_path=args.decode_manifest_path, batch_size=args.num_samples, sortagrad=False, - batch_shuffle=False) + shuffle_method=None) infer_data = batch_reader().next() # run inference @@ -123,6 +121,7 @@ def infer(): def main(): + utils.print_arguments(args) paddle.init(use_gpu=args.use_gpu, trainer_count=1) infer() diff --git a/train.py b/train.py index 6074aa358..c60a039b6 100644 --- a/train.py +++ b/train.py @@ -12,6 +12,7 @@ import distutils.util import paddle.v2 as paddle from model import deep_speech2 from data_utils.data import DataGenerator +import utils parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( @@ -51,6 +52,12 @@ parser.add_argument( default=True, type=distutils.util.strtobool, help="Use sortagrad or not. (default: %(default)s)") +parser.add_argument( + "--shuffle_method", + default='instance_shuffle', + type=str, + help="Shuffle method: 'instance_shuffle', 'batch_shuffle', " + "'batch_shuffle_batch'. (default: %(default)s)") parser.add_argument( "--trainer_count", default=4, @@ -93,9 +100,7 @@ args = parser.parse_args() def train(): - """ - DeepSpeech2 training. - """ + """DeepSpeech2 training.""" # initialize data generator def data_generator(): @@ -145,13 +150,13 @@ def train(): batch_size=args.batch_size, min_batch_size=args.trainer_count, sortagrad=args.use_sortagrad if args.init_model_path is None else False, - batch_shuffle=True) + shuffle_method=args.shuffle_method) test_batch_reader = test_generator.batch_reader_creator( manifest_path=args.dev_manifest_path, batch_size=args.batch_size, min_batch_size=1, # must be 1, but will have errors. sortagrad=False, - batch_shuffle=False) + shuffle_method=None) # create event handler def event_handler(event): @@ -186,6 +191,7 @@ def train(): def main(): + utils.print_arguments(args) paddle.init(use_gpu=args.use_gpu, trainer_count=args.trainer_count) train() diff --git a/utils.py b/utils.py new file mode 100644 index 000000000..9ca363c8f --- /dev/null +++ b/utils.py @@ -0,0 +1,25 @@ +"""Contains common utility functions.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + + +def print_arguments(args): + """Print argparse's arguments. + + Usage: + + .. code-block:: python + + parser = argparse.ArgumentParser() + parser.add_argument("name", default="Jonh", type=str, help="User name.") + args = parser.parse_args() + print_arguments(args) + + :param args: Input argparse.Namespace for printing. + :type args: argparse.Namespace + """ + print("----- Configuration Arguments -----") + for arg, value in vars(args).iteritems(): + print("%s: %s" % (arg, value)) + print("------------------------------------") From d66d740ea0f6002e2fb48f2b5a304f76205b2fdb Mon Sep 17 00:00:00 2001 From: chrisxu2016 <823254351@qq.com> Date: Fri, 16 Jun 2017 15:17:43 +0800 Subject: [PATCH 041/335] add audio part --- data_utils/audio.py | 457 ++++++++---------- data_utils/augmentor/audio_database.py | 401 --------------- data_utils/augmentor/augmentation.py | 15 - data_utils/augmentor/implus_response.py | 76 --- data_utils/augmentor/noise_speech.py | 318 ------------ .../online_bayesian_normalization.py | 57 --- data_utils/augmentor/resampler.py | 30 -- data_utils/augmentor/speed_perturb.py | 53 -- data_utils/augmentor/volume_perturb.py | 4 +- requirements.txt | 2 + 10 files changed, 215 insertions(+), 1198 deletions(-) delete mode 100755 data_utils/augmentor/audio_database.py delete mode 100755 data_utils/augmentor/implus_response.py delete mode 100755 data_utils/augmentor/noise_speech.py delete mode 100755 data_utils/augmentor/online_bayesian_normalization.py delete mode 100755 data_utils/augmentor/resampler.py delete mode 100755 data_utils/augmentor/speed_perturb.py diff --git a/data_utils/audio.py b/data_utils/audio.py index aef13c30f..ee4e6d84e 100755 --- a/data_utils/audio.py +++ b/data_utils/audio.py @@ -8,6 +8,7 @@ import io import soundfile import scikits.samplerate from scipy import signal +import random class AudioSegment(object): @@ -46,6 +47,32 @@ class AudioSegment(object): """Return whether two objects are unequal.""" return not self.__eq__(other) + def __len__(self): + """Returns length of segment in samples.""" + return self.num_samples + + def __add__(self, other): + """Add samples from another segment to those of this segment and return + a new segment (sample-wise addition, not segment concatenation). + + :param other: Segment containing samples to be + added in. + :type other: AudioSegment + :return: New segment containing resulting samples. + :rtype: AudioSegment + :raise TypeError: If sample rates of segments don't match, + or if length of segments don't match. + """ + if type(self) != type(other): + raise TypeError("Cannot add segment of different type: {}" + .format(type(other))) + if self._sample_rate != other._sample_rate: + raise TypeError("Sample rates must match to add segments.") + if len(self._samples) != len(other._samples): + raise TypeError("Segment lengths must match to add segments.") + samples = self.samples + other.samples + return type(self)(samples, sample_rate=self._sample_rate) + def __str__(self): """Return human-readable representation of segment.""" return ("%s: num_samples=%d, sample_rate=%d, duration=%.2fsec, " @@ -64,69 +91,6 @@ class AudioSegment(object): samples, sample_rate = soundfile.read(file, dtype='float32') return cls(samples, sample_rate) - @classmethod - def slice_from_file(cls, fname, start=None, end=None): - """ - Loads a small section of an audio without having to load - the entire file into the memory which can be incredibly wasteful. - - :param fname: input audio file name - :type fname: bsaestring - :param start: start time in seconds (supported granularity is ms) - If start is negative, it wraps around from the end. If not - provided, this function reads from the very beginning. - :type start: float - :param end: start time in seconds (supported granularity is ms) - If end is negative, it wraps around from the end. If not - provided, the default behvaior is to read to the end of the - file. - :type end: float - - :return:the specified slice of input audio in the audio.AudioSegment - format. - """ - sndfile = soundfile.SoundFile(fname) - - sample_rate = sndfile.samplerate - if sndfile.channels != 1: - raise TypeError("{} has more than 1 channel.".format(fname)) - - duration = float(len(sndfile)) / sample_rate - - if start is None: - start = 0.0 - if end is None: - end = duration - - if start < 0.0: - start += duration - if end < 0.0: - end += duration - - if start < 0.0: - raise IndexError("The slice start position ({} s) is out of " - "bounds. Filename: {}".format(start, fname)) - if end < 0.0: - raise IndexError("The slice end position ({} s) is out of bounds " - "Filename: {}".format(end, fname)) - - if start > end: - raise IndexError("The slice start position ({} s) is later than " - "the slice end position ({} s)." - .format(start, end)) - - if end > duration: - raise ValueError("The slice end time ({} s) is out of " - "bounds (> {} s) Filename: {}" - .format(end, duration, fname)) - - start_frame = int(start * sample_rate) - end_frame = int(end * sample_rate) - sndfile.seek(start_frame) - data = sndfile.read(frames=end_frame - start_frame, dtype='float32') - - return cls(data, sample_rate) - @classmethod def from_bytes(cls, bytes): """Create audio segment from a byte string containing audio samples. @@ -140,43 +104,30 @@ class AudioSegment(object): io.BytesIO(bytes), dtype='float32') return cls(samples, sample_rate) - @classmethod - def make_silence(cls, duration, sample_rate): - """Creates a silent audio segment of the given duration and - sample rate. - - :param duration: length of silence in seconds - :type duration: scalar - :param sample_rate: sample rate - :type sample_rate: scalar - :returns: silence of the given duration - :rtype: AudioSegment - """ - samples = np.zeros(int(float(duration) * sample_rate)) - return cls(samples, sample_rate) - - @classmethod - def concatenate(cls, *segments): + def concatenate(self, *segments): """Concatenate an arbitrary number of audio segments together. - :param *segments: input audio segments - :type *segments: [AudioSegment] + :param *segments: Input audio segments + :type *segments: AudioSegment + :return: Audio segment instance. + :rtype: AudioSegment + :raises ValueError: If number of segments is zero, or if sample_rate + not match between two audio segments + :raises TypeError: If item of segments is not Audiosegment instance """ # Perform basic sanity-checks. - N = len(segments) - if N == 0: + if len(segments) == 0: raise ValueError("No audio segments are given to concatenate.") sample_rate = segments[0]._sample_rate - for segment in segments: - if sample_rate != segment._sample_rate: + for seg in segments: + if sample_rate != seg._sample_rate: raise ValueError("Can't concatenate segments with " "different sample rates") - if type(segment) is not cls: + if type(seg) is not type(self): raise TypeError("Only audio segments of the same type " "instance can be concatenated.") - samples = np.concatenate([seg.samples for seg in segments]) - return cls(samples, sample_rate) + return type(self)(samples, sample_rate) def to_wav_file(self, filepath, dtype='float32'): """Save audio segment to disk as wav file. @@ -203,6 +154,65 @@ class AudioSegment(object): format='WAV', subtype=subtype_map[dtype]) + def slice_from_file(self, file, start=None, end=None): + """Loads a small section of an audio without having to load + the entire file into the memory which can be incredibly wasteful. + + :param file: Input audio filepath + :type file: basestring + :param start: Start time in seconds. If start is negative, it wraps + around from the end. If not provided, this function + reads from the very beginning. + :type start: float + :param end: End time in seconds. If end is negative, it wraps around + from the end. If not provided, the default behvaior is + to read to the end of the file. + :type end: float + :return: The specified slice of input audio in the audio.AudioSegment format. + :rtype: AudioSegment + :rainse ValueError: If the position is error, or if the time is out bounds. + """ + sndfile = soundfile.SoundFile(file) + sample_rate = sndfile.samplerate + duration = float(len(sndfile)) / sample_rate + start = 0. if start is None else start + end = 0. if end is None else end + if start < 0.0: + start += duration + if end < 0.0: + end += duration + if start < 0.0: + raise ValueError("The slice start position (%f s) is out of " + "bounds. Filename: %s" % (start, file)) + if end < 0.0: + raise ValueError("The slice end position (%f s) is out of bounds " + "Filename: %s" % (end, file)) + if start > end: + raise ValueError("The slice start position (%f s) is later than " + "the slice end position (%f s)." % (start, end)) + if end > duration: + raise ValueError("The slice end time (%f s) is out of bounds " + "(> %f s) Filename: %s" % (end, duration, file)) + start_frame = int(start * sample_rate) + end_frame = int(end * sample_rate) + sndfile.seek(start_frame) + data = sndfile.read(frames=end_frame - start_frame, dtype='float32') + return type(self)(data, sample_rate) + + def make_silence(self, duration, sample_rate): + """Creates a silent audio segment of the given duration and + sample rate. + + :param duration: Length of silence in seconds + :type duration: float + :param sample_rate: Sample rate + :type sample_rate: float + :return: Silence of the given duration + :rtype: AudioSegment + """ + samples = np.zeros(int(duration * sample_rate)) + return type(self)(samples, sample_rate) + def to_bytes(self, dtype='float32'): """Create a byte string containing the audio content. @@ -247,52 +257,49 @@ class AudioSegment(object): self._samples = np.interp(new_indices, old_indices, self._samples) def normalize(self, target_db=-20, max_gain_db=300.0): - """Normalize audio to desired RMS value in decibels. + """Normalize audio to be desired RMS value in decibels. Note that this is an in-place transformation. - :param target_db: Target RMS value in decibels.This value - should be less than 0.0 as 0.0 is full-scale audio. - :type target_db: float, optional - :param max_gain_db: Max amount of gain in dB that can be applied - for normalization. This is to prevent nans when attempting - to normalize a signal consisting of all zeros. - :type max_gain_db: float, optional - - :raises NormalizationWarning: if the required gain to normalize the - segment to the target_db value exceeds max_gain_db. + :param target_db: Target RMS value in decibels. This value should + be less than 0.0 as 0.0 is full-scale audio. + :type target_db: float + :param max_gain_db: Max amount of gain in dB that can be applied for + normalization. This is to prevent nans when attempting + to normalize a signal consisting of all zeros. + :type max_gain_db: float + :raises ValueError: If the required gain to normalize the segment to + the target_db value exceeds max_gain_db. """ gain = target_db - self.rms_db if gain > max_gain_db: raise ValueError( - "Unable to normalize segment to {} dB because it has an RMS " - "value of {} dB and the difference exceeds max_gain_db ({} dB)" - .format(target_db, self.rms_db, max_gain_db)) - gain = min(max_gain_db, target_db - self.rms_db) - self.apply_gain(gain) + "Unable to normalize segment to %f dB because it has an RMS " + "value of %f dB and the difference exceeds max_gain_db (%f dB)" + % (target_db, self.rms_db, max_gain_db)) + self.apply_gain(min(max_gain_db, target_db - self.rms_db)) def normalize_online_bayesian(self, target_db, prior_db, prior_samples, startup_delay=0.0): - """ - Normalize audio using a production-compatible online/causal algorithm. - This uses an exponential likelihood and gamma prior to make - online estimates of the RMS even when there are very few samples. + """Normalize audio using a production-compatible online/causal algorithm. + This uses an exponential likelihood and gamma prior to make online estimates + of the RMS even when there are very few samples. Note that this is an in-place transformation. :param target_db: Target RMS value in decibels - :type target_bd: scalar + :type target_bd: float :param prior_db: Prior RMS estimate in decibels - :type prior_db: scalar + :type prior_db: float :param prior_samples: Prior strength in number of samples - :type prior_samples: scalar - :param startup_delay: Default: 0.0 s. If provided, this - function will accrue statistics for the first startup_delay - seconds before applying online normalization. - :type startup_delay: scalar + :type prior_samples: float + :param startup_delay: Default 0.0 s. If provided, this function will accrue + statistics for the first startup_delay seconds before + applying online normalization. + :type startup_delay: float """ # Estimate total RMS online startup_sample_idx = min(self.num_samples - 1, @@ -309,88 +316,54 @@ class AudioSegment(object): mean_squared_estimate = ((cumsum_of_squares + prior_sum_of_squares) / (sample_count + prior_samples)) rms_estimate_db = 10 * np.log10(mean_squared_estimate) - # Compute required time-varying gain gain_db = target_db - rms_estimate_db - - # Apply gain to new segment - self.apply_gain(gain_db) - - def normalize_ewma(self, - target_db, - decay_rate, - startup_delay, - rms_eps=1e-6, - max_gain_db=300.0): - startup_sample_idx = min(self.num_samples - 1, - int(self.sample_rate * startup_delay)) - mean_sq = self.samples**2 - if startup_sample_idx > 0: - mean_sq[:startup_sample_idx] = \ - np.sum(mean_sq[:startup_sample_idx]) / startup_sample_idx - idx_start = max(0, startup_sample_idx - 1) - initial_condition = mean_sq[idx_start] * decay_rate - mean_sq[idx_start:] = lfilter( - [1.0 - decay_rate], [1.0, -decay_rate], - mean_sq[idx_start:], - axis=0, - zi=[initial_condition])[0] - rms_estimate_db = 10.0 * np.log10(mean_sq + rms_eps) - gain_db = target_db - rms_estimate_db - if np.any(gain_db > max_gain_db): - warnings.warn( - "Unable to normalize segment to {} dB because it has an RMS " - "value of {} dB and the difference exceeds max_gain_db ({} dB)" - .format(target_db, self.rms_db, max_gain_db), - NormalizationWarning) - gain_db = np.minimum(gain_db, max_gain_db) self.apply_gain(gain_db) def resample(self, target_sample_rate, quality='sinc_medium'): - """Resample audio and return new AudioSegment. - This resamples the audio to a new sample rate and returns a brand - new AudioSegment. The existing AudioSegment is unchanged. + """Resample audio segment. This resamples the audio to a new + sample rate. Note that this is an in-place transformation. - :param new_sample_rate: target sample rate - :type new_sample_rate: scalar + :param target_sample_rate: Target sample rate + :type target_sample_rate: int :param quality: One of {'sinc_fastest', 'sinc_medium', 'sinc_best'}. - Sets resampling speed/quality tradeoff. - See http://www.mega-nerd.com/SRC/api_misc.html#Converters + Sets resampling speed/quality tradeoff. + See http://www.mega-nerd.com/SRC/api_misc.html#Converters :type quality: basestring """ resample_ratio = target_sample_rate / self._sample_rate new_samples = scikits.samplerate.resample( self._samples, r=resample_ratio, type=quality) self._samples = new_samples - self._sample_rate = new_sample_rate + self._sample_rate = target_sample_rate def pad_silence(self, duration, sides='both'): """Pads this audio sample with a period of silence. Note that this is an in-place transformation. - :param duration: length of silence in seconds to pad + :param duration: Length of silence in seconds to pad :type duration: float - :param sides: - 'beginning' - adds silence in the beginning - 'end' - adds silence in the end - 'both' - adds silence in both the beginning and the end. - :type sides: basestring + :param sides: Position for padding + 'beginning' - adds silence in the beginning + 'end' - adds silence in the end + 'both' - adds silence in both the beginning and the end. + :type sides: str + :raises ValueError: If the sides not surport """ if duration == 0.0: return self - cls = type(self) - silence = cls.make_silence(duration, self._sample_rate) + silence = self.make_silence(duration, self._sample_rate) if sides == "beginning": - padded = cls.concatenate(silence, self) + padded = self.concatenate(silence, self) elif sides == "end": - padded = cls.concatenate(self, silence) + padded = self.concatenate(self, silence) elif sides == "both": - padded = cls.concatenate(silence, self, silence) + padded = self.concatenate(silence, self, silence) else: - raise ValueError("Unknown value for the kwarg 'sides'") + raise ValueError("Unknown value for the kwarg %s" % sides) self._samples = padded._samples self._sample_rate = padded._sample_rate @@ -398,88 +371,83 @@ class AudioSegment(object): """Return new AudioSegment containing audio between given boundaries. :param start_sec: Beginning of subsegment in seconds, - (beginning of segment if None). - :type start_sec: scalar + (beginning of segment if None). + :type start_sec: float :param end_sec: End of subsegment in seconds, - (end of segment if None). - :type end_sec: scalar - - :return: New AudioSegment containing specified - subsegment. - :trype: AudioSegment + (end of segment if None). + :type end_sec: float + :return: New AudioSegment containing specified subsegment. + :rtype: AudioSegment """ - # Default boundaries - if start_sec is None: - start_sec = 0.0 - if end_sec is None: - end_sec = self.duration - + start_sec = 0.0 if start_sec is None else start_sec + end_sec = self.duration if end_sec is None else end_sec # negative boundaries are relative to end of segment if start_sec < 0.0: start_sec = self.duration + start_sec if end_sec < 0.0: end_sec = self.duration + end_sec - start_sample = int(round(start_sec * self._sample_rate)) end_sample = int(round(end_sec * self._sample_rate)) samples = self._samples[start_sample:end_sample] - return type(self)(samples, sample_rate=self._sample_rate) def random_subsegment(self, subsegment_length, rng=None): - """ - Return a random subsegment of a specified length in seconds. + """Return a random subsegment of a specified length in seconds. :param subsegment_length: Subsegment length in seconds. - :type subsegment_length: scalar + :type subsegment_length: float :param rng: Random number generator state - :type rng: random.Random [optional] - - - :return:clip (SpeechDLSegment): New SpeechDLSegmen containing random - subsegment of original segment. + :type rng: random.Random + :return: New AudioSegment containing random subsegment + of original segment + :rtype: AudioSegment + :raises ValueError: If the length of subsegment greater than origineal + segemnt. """ - if rng is None: - rng = random.Random() - + rng = random.Random() if rng is None else rng if subsegment_length > self.duration: raise ValueError("Length of subsegment must not be greater " "than original segment.") start_time = rng.uniform(0.0, self.duration - subsegment_length) return self.subsegment(start_time, start_time + subsegment_length) - def convolve(self, ir, allow_resampling=False): + def convolve(self, impulse_segment, allow_resample=False): """Convolve this audio segment with the given filter. - :param ir: impulse response - :type ir: AudioSegment - :param allow_resampling: indicates whether resampling is allowed - when the ir has a different sample rate from this signal. - :type allow_resampling: boolean - """ - if allow_resampling and self.sample_rate != ir.sample_rate: - ir = ir.resample(self.sample_rate) - - if self.sample_rate != ir.sample_rate: - raise ValueError("Impulse response sample rate ({}Hz) is " - "equal to base signal sample rate ({}Hz)." - .format(ir.sample_rate, self.sample_rate)) + Note that this is an in-place transformation. - samples = signal.fftconvolve(self.samples, ir.samples, "full") + :param impulse_segment: Impulse response segments. + :type impulse_segment: AudioSegment + :param allow_resample: indicates whether resampling is allowed when + the impulse_segment has a different sample + rate from this signal. + :type allow_resample: boolean + :raises ValueError: If the sample rate is not match between two + audio segments and resample is not allowed. + """ + if allow_resample and self.sample_rate != impulse_segment.sample_rate: + impulse_segment = impulse_segment.resample(self.sample_rate) + if self.sample_rate != impulse_segment.sample_rate: + raise ValueError("Impulse segment's sample rate (%d Hz) is not" + "equal to base signal sample rate (%d Hz)." % + (impulse_segment.sample_rate, self.sample_rate)) + samples = signal.fftconvolve(self.samples, impulse_segment.samples, + "full") self._samples = samples - def convolve_and_normalize(self, ir, allow_resample=False): + def convolve_and_normalize(self, impulse_segment, allow_resample=False): """Convolve and normalize the resulting audio segment so that it has the same average power as the input signal. - :param ir: impulse response - :type ir: AudioSegment - :param allow_resampling: indicates whether resampling is allowed - when the ir has a different sample rate from this signal. - :type allow_resampling: boolean + :param impulse_segment: Impulse response segments. + :type impulse_segment: AudioSegment + :param allow_resample: indicates whether resampling is allowed when + the impulse_segment has a different sample rate from this signal. + :type allow_resample: boolean """ - self.convolve(ir, allow_resampling=allow_resampling) - self.normalize(target_db=self.rms_db) + target_db = self.rms_db + self.convolve(impulse_segment, allow_resample=allow_resample) + self.normalize(target_db) def add_noise(self, noise, @@ -492,36 +460,33 @@ class AudioSegment(object): of matching length is sampled from it and used instead. :param noise: Noise signal to add. - :type noise: SpeechDLSegment + :type noise: AudioSegment :param snr_dB: Signal-to-Noise Ratio, in decibels. - :type snr_dB: scalar - :param allow_downsampling: whether to allow the noise signal - to be downsampled to match the base signal sample rate. + :type snr_dB: float + :param allow_downsampling: whether to allow the noise signal to be downsampled + to match the base signal sample rate. :type allow_downsampling: boolean - :param max_gain_db: Maximum amount of gain to apply to noise - signal before adding it in. This is to prevent attempting - to apply infinite gain to a zero signal. - :type max_gain_db: scalar + :param max_gain_db: Maximum amount of gain to apply to noise signal before + adding it in. This is to prevent attempting to apply infinite + gain to a zero signal. + :type max_gain_db: float :param rng: Random number generator state. :type rng: random.Random - - Returns: - SpeechDLSegment: signal with noise added. + :raises ValueError: If the sample rate does not match between the two audio segments + and resample is not allowed, or if the duration of noise segments + is shorter than original audio segments. """ - if rng is None: - rng = random.Random() - + rng = random.Random() if rng is None else rng if allow_downsampling and noise.sample_rate > self.sample_rate: noise = noise.resample(self.sample_rate) - if noise.sample_rate != self.sample_rate: - raise ValueError("Noise sample rate ({}Hz) is not equal to " - "base signal sample rate ({}Hz)." - .format(noise.sample_rate, self.sample_rate)) + raise ValueError("Noise sample rate (%d Hz) is not equal to " + "base signal sample rate (%d Hz)." % + (noise.sample_rate, self.sample_rate)) if noise.duration < self.duration: - raise ValueError("Noise signal ({} sec) must be at " - "least as long as base signal ({} sec)." - .format(noise.duration, self.duration)) + raise ValueError("Noise signal (%f sec) must be at " + "least as long as base signal (%f sec)." % + (noise.duration, self.duration)) noise_gain_db = self.rms_db - noise.rms_db - snr_dB noise_gain_db = min(max_gain_db, noise_gain_db) noise_subsegment = noise.random_subsegment(self.duration, rng=rng) @@ -529,6 +494,12 @@ class AudioSegment(object): self._samples = output._samples self._sample_rate = output._sample_rate + def tranform_noise(self, noise_subsegment, noise_gain_db): + """ tranform noise file + """ + return type(self)(noise_subsegment._samples * (10.**( + noise_gain_db / 20.)), noise_subsegment._sample_rate) + @property def samples(self): """Return audio samples. @@ -618,9 +589,3 @@ class AudioSegment(object): else: raise TypeError("Unsupported sample type: %s." % samples.dtype) return output_samples.astype(dtype) - - def tranform_noise(self, noise_subsegment, noise_gain_db): - """ tranform noise file - """ - return type(self)(noise_subsegment._samples * (10.**( - noise_gain_db / 20.)), noise_subsegment._sample_rate) diff --git a/data_utils/augmentor/audio_database.py b/data_utils/augmentor/audio_database.py deleted file mode 100755 index e41c6dd72..000000000 --- a/data_utils/augmentor/audio_database.py +++ /dev/null @@ -1,401 +0,0 @@ -from __future__ import print_function -from collections import defaultdict -import bisect -import logging -import numpy as np -import os -import random -import sys - -UNK_TAG = "" - - -def stream_audio_index(fname, UNK=UNK_TAG): - """Reads an audio index file and emits one record in the index at a time. - - :param fname: audio index path - :type fname: basestring - :param UNK: UNK token to denote that certain audios are not tagged. - :type UNK: basesring - - Yields: - idx, duration, size, relpath, tags (int, float, int, str, list(str)): - audio file id, length of the audio in seconds, size in byte, - relative path w.r.t. to the root noise directory, list of tags - """ - with open(fname) as audio_index_file: - for i, line in enumerate(audio_index_file): - tok = line.strip().split("\t") - assert len(tok) >= 4, \ - "Invalid line at line {} in file {}".format( - i + 1, audio_index_file) - idx = int(tok[0]) - duration = float(tok[1]) - # Sometimes, the duration can round down to 0.0 - assert duration >= 0.0, \ - "Invalid duration at line {} in file {}".format( - i + 1, audio_index_file) - size = int(tok[2]) - assert size > 0, \ - "Invalid size at line {} in file {}".format( - i + 1, audio_index_file) - relpath = tok[3] - if len(tok) == 4: - tags = [UNK_TAG] - else: - tags = tok[4:] - yield idx, duration, size, relpath, tags - - -def truncate_float(val, ndigits=6): - """ Truncates a floating-point value to have the desired number of - digits after the decimal point. - - :param val: input value. - :type val: float - :parma ndigits: desired number of digits. - :type ndigits: int - - :return: truncated value - :rtype: float - """ - p = 10.0**ndigits - return float(int(val * p)) / p - - -def print_audio_index(idx, duration, size, relpath, tags, file=sys.stdout): - """Prints an audio record to the index file. - - :param idx: Audio file id. - :type idx: int - :param duration: length of the audio in seconds - :type duration: float - :param size: size of the file in bytes - :type size: int - :param relpath: relative path w.r.t. to the root noise directory. - :type relpath: basestring - :parma tags: list of tags - :parma tags: list(str) - :parma file: file to which we want to write an audio record. - :type file: sys.stdout - """ - file.write("{}\t{:.6f}\t{}\t{}" - .format(idx, truncate_float(duration, ndigits=6), size, relpath)) - for tag in tags: - file.write("\t{}".format(tag)) - file.write("\n") - - -class AudioIndex(object): - """ In-memory index of audio files that do not have annotations. - This supports duration-based sampling and sampling from a target - distribution. - - Each line in the index file consists of the following fields: - (id (int), duration (float), size (int), relative path (str), - list of tags ([str])) - """ - - def __init__(self): - self.audio_dir = None - self.index_fname = None - self.tags = None - self.bin_size = 2.0 - self.clear() - - def clear(self): - """ Clears the index - - Returns: - None - """ - self.idx_to_record = {} - # The list of indices correspond to audio files whose duration is - # greater than or equal to the key. - self.duration_to_id_set = {} - self.duration_to_id_set_per_tag = defaultdict(lambda: {}) - self.duration_to_list = defaultdict(lambda: []) - self.duration_to_list_per_tag = defaultdict( - lambda: defaultdict(lambda: [])) - self.tag_to_id_set = defaultdict(lambda: set()) - self.shared_duration_bins = [] - self.id_set_complete = set() - self.id_set = set() - self.duration_bins = [] - - def has_audio(self, distr=None): - """ - :param distr: The target distribution of audio tags that we want to - match. If this is not supplied, the function simply checks that - there are some audio files. - :parma distr: dict - :return: True if there are audio files. - :rtype: boolean - """ - if distr is None: - return len(self.id_set) > 0 - else: - for tag in distr: - if tag not in self.duration_to_list_per_tag: - return False - return True - - def _load_all_records_from_disk(self, audio_dir, idx_fname, bin_size): - """Loads all audio records from the disk into memory and groups them - into chunks based on their duration and the bin_size granalarity. - - Once all the records are read, indices are built from these records - by another function so that the audio samples can be drawn efficiently. - - Updates: - self.audio_dir (path): audio root directory - self.idx_fname (path): audio database index filename - self.bin_size (float): granularity of bins - self.idx_to_record (dict): maps from the audio id to - (duration, file_size, relative_path, tags) - self.tag_to_id_set (dict): maps from the tag to - the set of id's of audios that have this tag. - self.id_set_complete (set): set of all audio id's in the index file - self.min_duration (float): minimum audio duration observed in the - index file - self.duration_bins (list): the lower bounds on the duration of - audio files falling in each bin - self.duration_to_id_set (dict): contains (k, v) where v is the set - of id's of audios whose lengths are longer than or equal to k. - (e.g. k is the duration lower bound of this bin). - self.duration_to_id_set_per_tag (dict): Something like above but - has a finer granularity mapping from the tag to - duration_to_id_set. - self.shared_duration_bins (list): list of sets where each set - contains duration lower bounds whose audio id sets are the - same. The rationale for having this is that there are a few - but extremely long audio files which lead to a lot of bins. - When the id sets do not change across various minimum duration - boundaries, we - cluster these together and make them point to the same id set - reference. - - :return: whether the records were read from the disk. The assumption is - that the audio index file on disk and the actual audio files - are constructed once and never change during training. We only - re-read when either the directory or the index file path change. - """ - if self.audio_dir == audio_dir and self.idx_fname == idx_fname and \ - self.bin_size == bin_size: - # The audio directory and/or the list of audio files - # haven't changed. No need to load the list again. - return False - - # Remember where the audio index is most recently read from. - self.audio_dir = audio_dir - self.idx_fname = idx_fname - self.bin_size = bin_size - - # Read in the idx and compute the number of bins necessary - self.clear() - rank = [] - min_duration = float('inf') - max_duration = float('-inf') - for idx, duration, file_size, relpath, tags in \ - stream_audio_index(idx_fname): - self.idx_to_record[idx] = (duration, file_size, relpath, tags) - max_duration = max(max_duration, duration) - min_duration = min(min_duration, duration) - rank.append((duration, idx)) - for tag in tags: - self.tag_to_id_set[tag].add(idx) - if len(rank) == 0: - # file is empty - raise IOError("Index file {} is empty".format(idx_fname)) - for tag in self.tag_to_id_set: - self.id_set_complete |= self.tag_to_id_set[tag] - dur = min_duration - self.min_duration = min_duration - while dur < max_duration + bin_size: - self.duration_bins.append(dur) - dur += bin_size - - # Sort in decreasing order of duration and populate - # the cumulative indices lists. - rank.sort(reverse=True) - - # These are indices for `rank` and used to keep track of whether - # there are new records to add in the current bin. - last = 0 - cur = 0 - - # The set of audios falling in the previous bin; in the case, - # where we don't find new audios for the current bin, we store - # the reference to the last set so as to conserve memory. - # This is not such a big problem if the audio duration is - # bounded by a small number like 30 seconds and the - # bin size is big enough. But, for raw freesound audios, - # some audios can be as long as a few hours! - last_audio_set = set() - - # The same but for each tag so that we can pick audios based on - # tags and also some user-specified tag distribution. - last_audio_set_per_tag = defaultdict(lambda: set()) - - # Set of lists of bins sharing the same audio sets. - shared = set() - - for i in range(len(self.duration_bins) - 1, -1, -1): - lower_bound = self.duration_bins[i] - new_audio_idxs = set() - new_audio_idxs_per_tag = defaultdict(lambda: set()) - while cur < len(rank) and rank[cur][0] >= lower_bound: - idx = rank[cur][1] - tags = self.idx_to_record[idx][3] - new_audio_idxs.add(idx) - for tag in tags: - new_audio_idxs_per_tag[tag].add(idx) - cur += 1 - # This makes certain that the same list is shared across - # different bins if no new indices are added. - if cur == last: - shared.add(lower_bound) - else: - last_audio_set = last_audio_set | new_audio_idxs - for tag in new_audio_idxs_per_tag: - last_audio_set_per_tag[tag] = \ - last_audio_set_per_tag[tag] | \ - new_audio_idxs_per_tag[tag] - if len(shared) > 0: - self.shared_duration_bins.append(shared) - shared = set([lower_bound]) - ### last_audio_set = set() should set blank - last = cur - self.duration_to_id_set[lower_bound] = last_audio_set - for tag in last_audio_set_per_tag: - self.duration_to_id_set_per_tag[lower_bound][tag] = \ - last_audio_set_per_tag[tag] - - # The last `shared` record isn't added to the `shared_duration_bins`. - self.shared_duration_bins.append(shared) - - # We make sure that the while loop above has exhausted through the - # `rank` list by checking if the `cur`rent index in `rank` equals - # the length of the array, which is the halting condition. - assert cur == len(rank) - - return True - - def _build_index_from_records(self, tag_list): - """ Uses the in-memory records read from the index file to build - an in-memory index restricted to the given tag list. - - :param tag_list: List of tags we are interested in sampling from. - :type tag_list: list(str) - - Updates: - self.id_set (set): the set of all audio id's that can be sampled. - self.duration_to_list (dict): maps from the duration lower bound - to the id's of audios longer than this duration. - self.duration_to_list_per_tag (dict): maps from the tag to - the same structure as self.duration_to_list. This is to support - sampling from a target noise distribution. - - :return: whether the index was built from scratch - """ - if self.tags == tag_list: - return False - - self.tags = tag_list - if len(tag_list) == 0: - self.id_set = self.id_set_complete - else: - self.id_set = set() - for tag in tag_list: - self.id_set |= self.tag_to_id_set[tag] - - # Next, we need to take a subset of the audio files - for shared in self.shared_duration_bins: - # All bins in `shared' have the same index lists - # so we can intersect once and set all of them to this list. - lb = list(shared)[0] - intersected = list(self.id_set & self.duration_to_id_set[lb]) - duration_to_id_set = self.duration_to_id_set_per_tag[lb] - intersected_per_tag = { - tag: self.tag_to_id_set[tag] & duration_to_id_set[tag] - for tag in duration_to_id_set - } - for bin_key in shared: - self.duration_to_list[bin_key] = intersected - for tag in intersected_per_tag: - self.duration_to_list_per_tag[tag][bin_key] = \ - intersected_per_tag[tag] - assert len(self.duration_to_list) == len(self.duration_to_id_set) - return True - - def refresh_records_from_index_file(self, - audio_dir, - idx_fname, - tag_list, - bin_size=2.0): - """ Loads the index file and populates the records - for building the internal index. - - If the audio directory or index file name has changed, the whole index - is reloaded from scratch. If only the tag_list is changed, then the - desired index is built from the complete, in-memory record. - - :param audio_dir: audio directory - :type audio_dir: basestring - :param idx_fname: audio index file name - :type idex_fname: basestring - :param tag_list: list of tags we are interested in loading; - if empty, we load all. - :type tag_list: list - :param bin_size: optional argument for controlling the granularity - of duration bins - :type bin_size: float - """ - if tag_list is None: - tag_list = [] - reloaded_records = self._load_all_records_from_disk(audio_dir, - idx_fname, bin_size) - if reloaded_records or self.tags != tag_list: - self._build_index_from_records(tag_list) - logger.info('loaded {} audio files from {}' - .format(len(self.id_set), idx_fname)) - - def sample_audio(self, duration, rng=None, distr=None): - """ Uniformly draws an audio record of at least the desired duration - - :param duration: minimum desired audio duration - :type duration: float - :param rng: random number generator - :type rng: random.Random - :param distr: target distribution of audio tags. If not provided, - :type distr: dict - all audio files are sampled uniformly at random. - - :returns: success, (duration, file_size, path) - """ - if duration < 0.0: - duration = self.min_duration - i = bisect.bisect_left(self.duration_bins, duration) - if i == len(self.duration_bins): - return False, None - bin_key = self.duration_bins[i] - if distr is None: - indices = self.duration_to_list[bin_key] - else: - # If a desired audio distribution is given, we sample from it. - if rng is None: - rng = random.Random() - nprng = np.random.RandomState(rng.getrandbits(32)) - prob_masses = distr.values() - prob_masses /= np.sum(prob_masses) - tag = nprng.choice(distr.keys(), p=prob_masses) - indices = self.duration_to_list_per_tag[tag][bin_key] - if len(indices) == 0: - return False, None - else: - if rng is None: - rng = random.Random() - # duration, file size and relative path from root - s = self.idx_to_record[rng.sample(indices, 1)[0]] - s = (s[0], s[1], os.path.join(self.audio_dir, s[2])) - return True, s diff --git a/data_utils/augmentor/augmentation.py b/data_utils/augmentor/augmentation.py index c0a70ad18..abe1a0ec8 100755 --- a/data_utils/augmentor/augmentation.py +++ b/data_utils/augmentor/augmentation.py @@ -6,11 +6,6 @@ from __future__ import print_function import json import random from data_utils.augmentor.volume_perturb import VolumePerturbAugmentor -from data_utils.augmentor.resamler import ResamplerAugmentor -from data_utils.augmentor.speed_perturb import SpeedPerturbatioAugmentor -from data_utils.augmentor.online_bayesian_normalization import OnlineBayesianNormalizationAugmentor -from data_utils.augmentor.Impulse_response import ImpulseResponseAugmentor -from data_utils.augmentor.noise_speech import NoiseSpeechAugmentor class AugmentationPipeline(object): @@ -81,15 +76,5 @@ class AugmentationPipeline(object): """Return an augmentation model by the type name, and pass in params.""" if augmentor_type == "volume": return VolumePerturbAugmentor(self._rng, **params) - if augmentor_type == "resamle": - return ResamplerAugmentor(self._rng, **params) - if augmentor_type == "speed": - return SpeedPerturbatioAugmentor(self._rng, **params) - if augmentor_type == "online_bayesian_normalization": - return OnlineBayesianNormalizationAugmentor(self._rng, **params) - if augmentor_type == "Impulse_response": - return ImpulseResponseAugmentor(self._rng, **params) - if augmentor_type == "noise_speech": - return NoiseSpeechAugmentor(self._rng, **params) else: raise ValueError("Unknown augmentor type [%s]." % augmentor_type) diff --git a/data_utils/augmentor/implus_response.py b/data_utils/augmentor/implus_response.py deleted file mode 100755 index cc2053421..000000000 --- a/data_utils/augmentor/implus_response.py +++ /dev/null @@ -1,76 +0,0 @@ -""" Impulse response""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from . import base -from . import audio_database -from data_utils.speech import SpeechSegment - - -class ImpulseResponseAugmentor(base.AugmentorBase): - """ Instantiates an impulse response model - - :param ir_dir: directory containing impulse responses - :type ir_dir: basestring - :param tags: optional parameter for specifying what - particular impulse responses to apply. - :type tags: list - :parm tag_distr: optional noise distribution - :type tag_distr: dict - """ - - def __init__(self, rng, ir_dir, index_file, tags=None, tag_distr=None): - # Define all required parameter maps here. - self.ir_dir = ir_dir - self.index_file = index_file - - self.tags = tags - self.tag_distr = tag_distr - - self.audio_index = audio_database.AudioIndex() - self.rng = rng - - def _init_data(self): - """ Preloads stuff from disk in an attempt (e.g. list of files, etc) - to make later loading faster. If the data configuration remains the - same, this function does nothing. - - """ - self.audio_index.refresh_records_from_index_file( - self.ir_dir, self.index_file, self.tags) - - def transform_audio(self, audio_segment): - """ Convolves the input audio with an impulse response. - - :param audio_segment: input audio - :type audio_segment: AudioSegemnt - """ - # This handles the cases where the data source or directories change. - self._init_data() - - read_size = 0 - tag_distr = self.tag_distr - if not self.audio_index.has_audio(tag_distr): - if tag_distr is None: - if not self.tags: - raise RuntimeError("The ir index does not have audio " - "files to sample from.") - else: - raise RuntimeError("The ir index does not have audio " - "files of the given tags to sample " - "from.") - else: - raise RuntimeError("The ir index does not have audio " - "files to match the target ir " - "distribution.") - else: - # Querying with a negative duration triggers the index to search - # from all impulse responses. - success, record = self.audio_index.sample_audio( - -1.0, rng=self.rng, distr=tag_distr) - if success is True: - _, read_size, ir_fname = record - ir_wav = SpeechSegment.from_file(ir_fname) - audio_segment.convolve(ir_wav, allow_resampling=True) diff --git a/data_utils/augmentor/noise_speech.py b/data_utils/augmentor/noise_speech.py deleted file mode 100755 index 8cf7c27b6..000000000 --- a/data_utils/augmentor/noise_speech.py +++ /dev/null @@ -1,318 +0,0 @@ -""" noise speech -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import logging -import numpy as np -import os -from collections import defaultdict - -from . import base -from . import audio_database -from data_utils.speech import SpeechSegment - -TURK = "turk" -USE_AUDIO_DATABASE_SOURCES = frozenset(["freesound", "chime"]) -HALF_NOISE_LENGTH_MIN_THRESHOLD = 3.0 -FIND_NOISE_MAX_ATTEMPTS = 20 - -logger = logging.getLogger(__name__) - - -def get_first_smaller(items, value): - index = bisect.bisect_left(items, value) - 1 - assert items[index] < value, \ - 'get_first_smaller failed! %d %d' % (items[index], value) - return items[index] - - -def get_first_larger(items, value): - 'Find leftmost value greater than value' - index = bisect.bisect_right(items, value) - assert index < len(items), \ - "no noise bin exists for this audio length (%f)" % value - assert items[index] > value, \ - 'get_first_larger failed! %d %d' % (items[index], value) - return items[index] - - -def _get_turk_noise_files(noise_dir, index_file): - """ Creates a map from duration => a list of noise filenames - - :param noise_dir: Directory of noise files which contains - "noise-samples-list" - :type noise_dir: basestring - :param index_file: Noise list - :type index_file: basestring - - returns:noise_files (defaultdict): A map of bins to noise files. - Each key is the duration, and the value is a list of noise - files binned to this duration. Each bin is 2 secs. - - Note: noise-samples-list should contain one line per noise (wav) file - along with its duration in milliseconds - """ - noise_files = defaultdict(list) - if not os.path.exists(index_file): - logger.error('No noise files were found at {}'.format(index_file)) - return noise_files - num_noise_files = 0 - rounded_durations = list(range(0, 65, 2)) - with open(index_file, 'r') as fl: - for line in fl: - fname = os.path.join(noise_dir, line.strip().split()[0]) - duration = float(line.strip().split()[1]) / 1000 - # bin the noise files into length bins rounded by 2 sec - bin_id = get_first_smaller(rounded_durations, duration) - noise_files[bin_id].append(fname) - num_noise_files += 1 - logger.info('Loaded {} turk noise files'.format(num_noise_files)) - return noise_files - - -class NoiseSpeechAugmentor(base.AugmentorBase): - """ Noise addition block - - :param snr_min: minimum signal-to-noise ratio - :type snr_min: float - :param snr_max: maximum signal-to-noise ratio - :type snr_max: float - :param noise_dir: root of where noise files are stored - :type noise_fir: basestring - :param index_file: index of noises of interest in noise_dir - :type index_file: basestring - :param source: select one from - - turk - - freesound - - chime - Note that this field is no longer required for the freesound - and chime - :type source: string - :param tags: optional parameter for specifying what - particular noises we want to add. See above for the available tags. - :type tags: list - :param tag_distr: optional noise distribution - :type tag_distr: dict - """ - - def __init__(self, - rng, - snr_min, - snr_max, - noise_dir, - source, - allow_downsampling=None, - index_file=None, - tags=None, - tag_distr=None): - # Define all required parameter maps here. - self.rng = rng - self.snr_min = snr_min - self.snr_max = snr_max - self.noise_dir = noise_dir - self.source = source - - self.allow_downsampling = allow_downsampling - self.index_file = index_file - self.tags = tags - self.tag_distr = tag_distr - - # When new noise sources are added, make sure to define the - # associated bookkeeping variables here. - self.turk_noise_files = [] - self.turk_noise_dir = None - self.audio_index = audio_database.AudioIndex() - - def _init_data(self): - """ Preloads stuff from disk in an attempt (e.g. list of files, etc) - to make later loading faster. If the data configuration remains the - same, this function does nothing. - - """ - noise_dir = self.noise_dir - index_file = self.index_file - source = self.source - if not index_file: - if source == TURK: - index_file = os.path.join(noise_dir, 'noise-samples-list') - logger.debug("index_file not provided; " + "defaulting to " + - index_file) - else: - if source != "": - assert source in USE_AUDIO_DATABASE_SOURCES, \ - "{} not supported by audio_database".format(source) - index_file = os.path.join(noise_dir, - "audio_index_commercial.txt") - logger.debug("index_file not provided; " + "defaulting to " + - index_file) - - if source == TURK: - if self.turk_noise_dir != noise_dir: - self.turk_noise_dir = noise_dir - self.turk_noise_files = _get_turk_noise_files(noise_dir, - index_file) - # elif source == TODO_SUPPORT_NON_AUDIO_DATABASE_BASED_SOURCES: - else: - if source != "": - assert source in USE_AUDIO_DATABASE_SOURCES, \ - "{} not supported by audio_database".format(source) - self.audio_index.refresh_records_from_index_file( - self.noise_dir, index_file, self.tags) - - def transform_audio(self, audio_segment): - """Adds walla noise - - :param audio_segment: Input audio - :type audio_segment: SpeechSegment - """ - # This handles the cases where the data source or directories change. - self._init_data - source = self.source - allow_downsampling = self.allow_downsampling - if source == TURK: - self._add_turk_noise(audio_segment, self.rng, allow_downsampling) - # elif source == TODO_SUPPORT_NON_AUDIO_DATABASE_BASED_SOURCES: - else: - self._add_noise(audio_segment, self.rng, allow_downsampling) - - def _sample_snr(self): - """ Returns a float sampled in [`self.snr_min`, `self.snr_max`] - if both `self.snr_min` and `self.snr_max` are non-zero. - """ - snr_min = self.snr_min - snr_max = self.snr_max - sampled_snr = self.rng.uniform(snr_min, snr_max) - return sampled_snr - - def _add_turk_noise(self, audio_segment, allow_downsampling): - """ Adds a turk noise to the input audio. - - :param audio_segment: input audio - :type audio_segment: audiosegment - :param allow_downsampling: indicates whether downsampling - is allowed - :type allow_downsampling: boolean - """ - read_size = 0 - if len(self.turk_noise_files) > 0: - snr = self._sample_snr(self.rng) - # Draw the noise file randomly from noise files that are - # slightly longer than the utterance - noise_bins = sorted(self.turk_noise_files.keys()) - # note some bins can be empty, so we can't just round up - # to the nearest 2-sec interval - rounded_duration = get_first_larger(noise_bins, - audio_segment.duration) - noise_fname = \ - self.rng.sample(self.turk_noise_files[rounded_duration], 1)[0] - noise = SpeechSegment.from_wav_file(noise_fname) - logger.debug('noise_fname {}'.format(noise_fname)) - logger.debug('snr {}'.format(snr)) - read_size = len(noise) * 2 - # May throw exceptions, but this is caught by - # AudioFeaturizer.get_audio_files. - audio_segment.add_noise( - noise, snr, rng=self.rng, allow_downsampling=allow_downsampling) - - def _add_noise(self, audio_segment, allow_downsampling): - """ Adds a noise indexed in audio_database.AudioIndex. - - :param audio_segment: input audio - :type audio_segment: SpeechSegment - :param allow_downsampling: indicates whether downsampling - is allowed - :type allow_downsampling: boolean - - Returns: - (SpeechSegment, int) - - sound with turk noise added - - number of bytes read from disk - """ - read_size = 0 - tag_distr = self.tag_distr - if not self.audio_index.has_audio(tag_distr): - if tag_distr is None: - if not self.tags: - raise RuntimeError("The noise index does not have audio " - "files to sample from.") - else: - raise RuntimeError("The noise index does not have audio " - "files of the given tags to sample " - "from.") - else: - raise RuntimeError("The noise index does not have audio " - "files to match the target noise " - "distribution.") - else: - # Compute audio segment related statistics - audio_duration = audio_segment.duration - - # Sample relevant augmentation parameters. - snr = self._sample_snr(self.rng) - - # Perhaps, we may not have a sufficiently long noise, so we need - # to search iteratively. - min_duration = audio_duration + 0.25 - for _ in range(FIND_NOISE_MAX_ATTEMPTS): - logger.debug("attempting to find noise of length " - "at least {}".format(min_duration)) - - success, record = \ - self.audio_index.sample_audio(min_duration, - rng=self.rng, - distr=tag_distr) - - if success is True: - noise_duration, read_size, noise_fname = record - - # Assert after logging so we know - # what caused augmentation to fail. - logger.debug("noise_fname {}".format(noise_fname)) - logger.debug("snr {}".format(snr)) - assert noise_duration >= min_duration - break - - # Decrease the desired minimum duration linearly. - # If the value becomes smaller than some threshold, - # we half the value instead. - if min_duration > HALF_NOISE_LENGTH_MIN_THRESHOLD: - min_duration -= 2.0 - else: - min_duration *= 0.5 - - if success is False: - logger.info("Failed to find a noise file") - return - - diff_duration = audio_duration + 0.25 - noise_duration - if diff_duration >= 0.0: - # Here, the noise is shorter than the audio file, so - # we pad with zeros to make sure the noise sound is applied - # with a uniformly random shift. - noise = SpeechSegment.from_file(noise_fname) - noise = noise.pad_silence(diff_duration, sides="both") - else: - # The noise clip is at least ~25 ms longer than the audio - # segment here. - diff_duration = int(noise_duration * audio_segment.sample_rate) - \ - int(audio_duration * audio_segment.sample_rate) - \ - int(0.02 * audio_segment.sample_rate) - start = float(self.rng.randint(0, diff_duration)) / \ - audio.sample_rate - finish = min(start + audio_duration + 0.2, noise_duration) - noise = SpeechSegment.slice_from_file(noise_fname, start, - finish) - - if len(noise) < len(audio_segment): - # This is to ensure that the noise clip is at least as - # long as the audio segment. - num_samples_to_pad = len(audio_segment) - len(noise) - # Padding this amount of silence on both ends ensures that - # the placement of the noise clip is uniformly random. - silence = SpeechSegment( - np.zeros(num_samples_to_pad), audio_segment.sample_rate) - noise = SpeechSegment.concatenate(silence, noise, silence) - - audio_segment.add_noise( - noise, snr, rng=self.rng, allow_downsampling=allow_downsampling) diff --git a/data_utils/augmentor/online_bayesian_normalization.py b/data_utils/augmentor/online_bayesian_normalization.py deleted file mode 100755 index bc2d6c1b6..000000000 --- a/data_utils/augmentor/online_bayesian_normalization.py +++ /dev/null @@ -1,57 +0,0 @@ -""" Online bayesian normalization -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from . import base - - -class OnlineBayesianNormalizationAugmentor(base.AugmentorBase): - """ - Instantiates an online bayesian normalization module. - :param target_db: Target RMS value in decibels - :type target_db: func[int->scalar] - :param prior_db: Prior RMS estimate in decibels - :type prior_db: func[int->scalar] - :param prior_samples: Prior strength in number of samples - :type prior_samples: func[int->scalar] - :param startup_delay: Start-up delay in seconds during - which normalization statistics is accrued. - :type starup_delay: func[int->scalar] - """ - - def __init__(self, - rng, - target_db, - prior_db, - prior_samples, - startup_delay=base.parse_parameter_from(0.0)): - - self.target_db = target_db - self.prior_db = prior_db - self.prior_samples = prior_samples - self.startup_delay = startup_delay - self.rng = rng - - def transform_audio(self, audio_segment): - """ - Normalizes the input audio using the online Bayesian approach. - - :param audio_segment: input audio - :type audio_segment: SpeechSegment - :param iteration: current iteration - :type iteration: int - :param text: audio transcription - :type text: basestring - :param rng: RNG to use for augmentation - :type rng: random.Random - - """ - read_size = 0 - target_db = self.target_db(iteration) - prior_db = self.prior_db(iteration) - prior_samples = self.prior_samples(iteration) - startup_delay = self.startup_delay(iteration) - audio.normalize_online_bayesian( - target_db, prior_db, prior_samples, startup_delay=startup_delay) diff --git a/data_utils/augmentor/resampler.py b/data_utils/augmentor/resampler.py deleted file mode 100755 index 1b959be56..000000000 --- a/data_utils/augmentor/resampler.py +++ /dev/null @@ -1,30 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from . import base - - -class ResamplerAugmentor(base.AugmentorBase): - """ Instantiates a resampler module. - - :param new_sample_rate: New sample rate in Hz - :type new_sample_rate: func[int->scalar] - :param rng: Random generator object. - :type rng: random.Random - """ - - def __init__(self, rng, new_sample_rate): - self.new_sample_rate = new_sample_rate - self._rng = rng - - def transform_audio(self, audio_segment): - """ Resamples the input audio to the target sample rate. - - Note that this is an in-place transformation. - - :param audio: input audio - :type audio: SpeechDLSegment - """ - new_sample_rate = self.new_sample_rate - audio.resample(new_sample_rate) \ No newline at end of file diff --git a/data_utils/augmentor/speed_perturb.py b/data_utils/augmentor/speed_perturb.py deleted file mode 100755 index e09be5f74..000000000 --- a/data_utils/augmentor/speed_perturb.py +++ /dev/null @@ -1,53 +0,0 @@ -"""Speed perturbation module for making ASR robust to different voice -types (high pitched, low pitched, etc) -Samples uniformly between speed_min and speed_max -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from . import base - - -class SpeedPerturbatioAugmentor(base.AugmentorBase): - """ - Instantiates a speed perturbation module. - - See reference paper here: - - http://www.danielpovey.com/files/2015_interspeech_augmentation.pdf - - :param speed_min: Lower bound on new rate to sample - :type speed_min: func[int->scalar] - :param speed_max: Upper bound on new rate to sample - :type speed_max: func[int->scalar] - """ - - def __init__(self, rng, speed_min, speed_max): - - if (speed_min < 0.9): - raise ValueError( - "Sampling speed below 0.9 can cause unnatural effects") - if (speed_min > 1.1): - raise ValueError( - "Sampling speed above 1.1 can cause unnatural effects") - self.speed_min = speed_min - self.speed_max = speed_max - self.rng = rng - - def transform_audio(self, audio_segment): - """ - Samples a new speed rate from the given range and - changes the speed of the given audio clip. - - Note that this is an in-place transformation. - - :param audio_segment: input audio - :type audio_segment: SpeechDLSegment - """ - read_size = 0 - speed_min = self.speed_min(iteration) - speed_max = self.speed_max(iteration) - sampled_speed = rng.uniform(speed_min, speed_max) - audio = audio.change_speed(sampled_speed) diff --git a/data_utils/augmentor/volume_perturb.py b/data_utils/augmentor/volume_perturb.py index 15055b915..a5a9f6cad 100755 --- a/data_utils/augmentor/volume_perturb.py +++ b/data_utils/augmentor/volume_perturb.py @@ -3,10 +3,10 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from . import base +from data_utils.augmentor.base import AugmentorBase -class VolumePerturbAugmentor(base.AugmentorBase): +class VolumePerturbAugmentor(AugmentorBase): """Augmentation model for adding random volume perturbation. This is used for multi-loudness training of PCEN. See diff --git a/requirements.txt b/requirements.txt index 58a93debe..c37e88ffe 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,4 @@ SoundFile==0.9.0.post1 wget==3.2 +scikits.samplerate==0.3.3 +scipy==0.13.0b1 From d1ee10be102263da5fbfac1e131c31ed605b5ad0 Mon Sep 17 00:00:00 2001 From: chrisxu2016 <823254351@qq.com> Date: Fri, 16 Jun 2017 18:29:56 +0800 Subject: [PATCH 042/335] modify audio and speech --- data_utils/audio.py | 14 ++++++++------ data_utils/speech.py | 26 ++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 6 deletions(-) diff --git a/data_utils/audio.py b/data_utils/audio.py index ee4e6d84e..066437dc6 100755 --- a/data_utils/audio.py +++ b/data_utils/audio.py @@ -104,7 +104,8 @@ class AudioSegment(object): io.BytesIO(bytes), dtype='float32') return cls(samples, sample_rate) - def concatenate(self, *segments): + @classmethod + def concatenate(cls, *segments): """Concatenate an arbitrary number of audio segments together. :param *segments: Input audio segments @@ -123,11 +124,11 @@ class AudioSegment(object): if sample_rate != seg._sample_rate: raise ValueError("Can't concatenate segments with " "different sample rates") - if type(seg) is not type(self): + if type(seg) is not cls: raise TypeError("Only audio segments of the same type " "instance can be concatenated.") samples = np.concatenate([seg.samples for seg in segments]) - return type(self)(samples, sample_rate) + return cls(samples, sample_rate) def to_wav_file(self, filepath, dtype='float32'): """Save audio segment to disk as wav file. @@ -355,13 +356,14 @@ class AudioSegment(object): """ if duration == 0.0: return self + cls = type(self) silence = self.make_silence(duration, self._sample_rate) if sides == "beginning": - padded = self.concatenate(silence, self) + padded = cls.concatenate(silence, self) elif sides == "end": - padded = self.concatenate(self, silence) + padded = cls.concatenate(self, silence) elif sides == "both": - padded = self.concatenate(silence, self, silence) + padded = cls.concatenate(silence, self, silence) else: raise ValueError("Unknown value for the kwarg %s" % sides) self._samples = padded._samples diff --git a/data_utils/speech.py b/data_utils/speech.py index 48db595b4..5d1fc15a7 100755 --- a/data_utils/speech.py +++ b/data_utils/speech.py @@ -65,6 +65,32 @@ class SpeechSegment(AudioSegment): audio = AudioSegment.from_bytes(bytes) return cls(audio.samples, audio.sample_rate, transcript) + @classmethod + def concatenate(cls, *segments): + """Concatenate an arbitrary number of audio segments together. + + :param *segments: Input speech segments + :type *segments: SpeechSegment + :return: Speech segment instance. + :rtype: SpeechSegment + :raises ValueError: If number of segments is zero, or if sample_rate + not match between two audio segments + :raises TypeError: If item of segments is not Audiosegment instance + """ + # Perform basic sanity-checks. + if len(segments) == 0: + raise ValueError("No audio segments are given to concatenate.") + sample_rate = segments[0]._sample_rate + for seg in segments: + if sample_rate != seg._sample_rate: + raise ValueError("Can't concatenate segments with " + "different sample rates") + if type(seg) is not cls: + raise TypeError("Only speech segments of the same type " + "instance can be concatenated.") + samples = np.concatenate([seg.samples for seg in segments]) + return cls(samples, sample_rate, seg._transcript) + @property def transcript(self): """Return the transcript text. From 5ca270d30a34c71b0b851ed376fb7e7d90b3cf17 Mon Sep 17 00:00:00 2001 From: chrisxu2016 <823254351@qq.com> Date: Sat, 17 Jun 2017 09:03:18 +0800 Subject: [PATCH 043/335] add audio file --- data_utils/audio.py | 245 ++++++++++++++++++++----------------------- data_utils/speech.py | 55 ++++++++-- 2 files changed, 161 insertions(+), 139 deletions(-) diff --git a/data_utils/audio.py b/data_utils/audio.py index 066437dc6..1f75da8ac 100755 --- a/data_utils/audio.py +++ b/data_utils/audio.py @@ -47,32 +47,6 @@ class AudioSegment(object): """Return whether two objects are unequal.""" return not self.__eq__(other) - def __len__(self): - """Returns length of segment in samples.""" - return self.num_samples - - def __add__(self, other): - """Add samples from another segment to those of this segment and return - a new segment (sample-wise addition, not segment concatenation). - - :param other: Segment containing samples to be - added in. - :type other: AudioSegment - :return: New segment containing resulting samples. - :rtype: AudioSegment - :raise TypeError: If sample rates of segments don't match, - or if length of segments don't match. - """ - if type(self) != type(other): - raise TypeError("Cannot add segment of different type: {}" - .format(type(other))) - if self._sample_rate != other._sample_rate: - raise TypeError("Sample rates must match to add segments.") - if len(self._samples) != len(other._samples): - raise TypeError("Segment lengths must match to add segments.") - samples = self.samples + other.samples - return type(self)(samples, sample_rate=self._sample_rate) - def __str__(self): """Return human-readable representation of segment.""" return ("%s: num_samples=%d, sample_rate=%d, duration=%.2fsec, " @@ -108,13 +82,13 @@ class AudioSegment(object): def concatenate(cls, *segments): """Concatenate an arbitrary number of audio segments together. - :param *segments: Input audio segments + :param *segments: Input audio segments. :type *segments: AudioSegment - :return: Audio segment instance. + :return: Audio segment instance as concatenating results. :rtype: AudioSegment - :raises ValueError: If number of segments is zero, or if sample_rate - not match between two audio segments - :raises TypeError: If item of segments is not Audiosegment instance + :raises ValueError: If the number of segments is zero, or if the + sample_rate of any two segments does not match. + :raises TypeError: If every segment in is not Audiosegment instance. """ # Perform basic sanity-checks. if len(segments) == 0: @@ -155,12 +129,13 @@ class AudioSegment(object): format='WAV', subtype=subtype_map[dtype]) - def slice_from_file(self, file, start=None, end=None): + @classmethod + def slice_from_file(cls, file, start=None, end=None): """Loads a small section of an audio without having to load the entire file into the memory which can be incredibly wasteful. - :param file: Input audio filepath - :type file: basestring + :param file: Input audio filepath or file object. + :type file: basestring|file :param start: Start time in seconds. If start is negative, it wraps around from the end. If not provided, this function reads from the very beginning. @@ -169,9 +144,11 @@ class AudioSegment(object): from the end. If not provided, the default behvaior is to read to the end of the file. :type end: float - :return: The specified slice of input audio in the audio.AudioSegment format. + :return: AudioSegment instance of the specified slice of the input + audio file. :rtype: AudioSegment - :rainse ValueError: If the position is error, or if the time is out bounds. + :raise ValueError: If start or end is incorrectly set, e.g. out of + bounds in time. """ sndfile = soundfile.SoundFile(file) sample_rate = sndfile.samplerate @@ -184,40 +161,60 @@ class AudioSegment(object): end += duration if start < 0.0: raise ValueError("The slice start position (%f s) is out of " - "bounds. Filename: %s" % (start, file)) + "bounds." % start) if end < 0.0: - raise ValueError("The slice end position (%f s) is out of bounds " - "Filename: %s" % (end, file)) + raise ValueError("The slice end position (%f s) is out of bounds." % + end) if start > end: raise ValueError("The slice start position (%f s) is later than " "the slice end position (%f s)." % (start, end)) if end > duration: - raise ValueError("The slice end time (%f s) is out of bounds " - "(> %f s) Filename: %s" % (end, duration, file)) + raise ValueError("The slice end position (%f s) is out of bounds " + "(> %f s)" % (end, duration)) start_frame = int(start * sample_rate) end_frame = int(end * sample_rate) sndfile.seek(start_frame) data = sndfile.read(frames=end_frame - start_frame, dtype='float32') - return type(self)(data, sample_rate) + return cls(data, sample_rate) - def make_silence(self, duration, sample_rate): + @classmethod + def make_silence(cls, duration, sample_rate): """Creates a silent audio segment of the given duration and sample rate. - :param duration: Length of silence in seconds + :param duration: Length of silence in seconds. :type duration: float - :param sample_rate: Sample rate + :param sample_rate: Sample rate. :type sample_rate: float - :return: Silence of the given duration + :return: Silent AudioSegment instance of the given duration. :rtype: AudioSegment """ samples = np.zeros(int(duration * sample_rate)) - return type(self)(samples, sample_rate) + return cls(samples, sample_rate) + + def superimposed(self, other): + """Add samples from another segment to those of this segment + (sample-wise addition, not segment concatenation). + + :param other: Segment containing samples to be added in. + :type other: AudioSegments + :raise TypeError: If type of two segments don't match. + :raise ValueError: If the sample_rate of two segments not equal, or if + the length of segments don't match. + """ + if type(self) != type(other): + raise TypeError("Cannot add segments of different types: %s " + "and %s." % (type(self), type(other))) + if self._sample_rate != other._sample_rate: + raise ValueError("Sample rates must match to add segments.") + if len(self._samples) != len(other._samples): + raise ValueError("Segment lengths must match to add segments.") + self._samples += other._samples def to_bytes(self, dtype='float32'): """Create a byte string containing the audio content. - :param dtype: Data type for export samples. Options: 'int16', 'int32', + :param dtype: Data type for export samples. Options: 'int16','int32', 'float32', 'float64'. Default is 'float32'. :type dtype: str :return: Byte string containing audio content. @@ -258,16 +255,17 @@ class AudioSegment(object): self._samples = np.interp(new_indices, old_indices, self._samples) def normalize(self, target_db=-20, max_gain_db=300.0): - """Normalize audio to be desired RMS value in decibels. + """Normalize audio to be of the desired RMS value in decibels. Note that this is an in-place transformation. - :param target_db: Target RMS value in decibels. This value should - be less than 0.0 as 0.0 is full-scale audio. + :param target_db: Target RMS value in decibels. This value should be + less than 0.0 as 0.0 is full-scale audio. :type target_db: float :param max_gain_db: Max amount of gain in dB that can be applied for - normalization. This is to prevent nans when attempting - to normalize a signal consisting of all zeros. + normalization. This is to prevent nans when + attempting to normalize a signal consisting of + all zeros. :type max_gain_db: float :raises ValueError: If the required gain to normalize the segment to the target_db value exceeds max_gain_db. @@ -275,9 +273,9 @@ class AudioSegment(object): gain = target_db - self.rms_db if gain > max_gain_db: raise ValueError( - "Unable to normalize segment to %f dB because it has an RMS " - "value of %f dB and the difference exceeds max_gain_db (%f dB)" - % (target_db, self.rms_db, max_gain_db)) + "Unable to normalize segment to %f dB because the " + "the probable gain have exceeds max_gain_db (%f dB)" % + (target_db, max_gain_db)) self.apply_gain(min(max_gain_db, target_db - self.rms_db)) def normalize_online_bayesian(self, @@ -285,30 +283,30 @@ class AudioSegment(object): prior_db, prior_samples, startup_delay=0.0): - """Normalize audio using a production-compatible online/causal algorithm. - This uses an exponential likelihood and gamma prior to make online estimates - of the RMS even when there are very few samples. + """Normalize audio using a production-compatible online/causal + algorithm. This uses an exponential likelihood and gamma prior to + make online estimates of the RMS even when there are very few samples. Note that this is an in-place transformation. - :param target_db: Target RMS value in decibels + :param target_db: Target RMS value in decibels. :type target_bd: float - :param prior_db: Prior RMS estimate in decibels + :param prior_db: Prior RMS estimate in decibels. :type prior_db: float - :param prior_samples: Prior strength in number of samples + :param prior_samples: Prior strength in number of samples. :type prior_samples: float - :param startup_delay: Default 0.0 s. If provided, this function will accrue - statistics for the first startup_delay seconds before - applying online normalization. + :param startup_delay: Default 0.0 s. If provided, this function will + accrue statistics for the first startup_delay + seconds before applying online normalization. :type startup_delay: float """ - # Estimate total RMS online + # Estimate total RMS online. startup_sample_idx = min(self.num_samples - 1, int(self.sample_rate * startup_delay)) prior_mean_squared = 10.**(prior_db / 10.) prior_sum_of_squares = prior_mean_squared * prior_samples cumsum_of_squares = np.cumsum(self.samples**2) - sample_count = np.arange(len(self)) + 1 + sample_count = np.arange(len(self.num_samples)) + 1 if startup_sample_idx > 0: cumsum_of_squares[:startup_sample_idx] = \ cumsum_of_squares[startup_sample_idx] @@ -317,42 +315,40 @@ class AudioSegment(object): mean_squared_estimate = ((cumsum_of_squares + prior_sum_of_squares) / (sample_count + prior_samples)) rms_estimate_db = 10 * np.log10(mean_squared_estimate) - # Compute required time-varying gain + # Compute required time-varying gain. gain_db = target_db - rms_estimate_db self.apply_gain(gain_db) def resample(self, target_sample_rate, quality='sinc_medium'): - """Resample audio segment. This resamples the audio to a new - sample rate. + """Resample the audio to a target sample rate. Note that this is an in-place transformation. - :param target_sample_rate: Target sample rate + :param target_sample_rate: Target sample rate. :type target_sample_rate: int :param quality: One of {'sinc_fastest', 'sinc_medium', 'sinc_best'}. Sets resampling speed/quality tradeoff. See http://www.mega-nerd.com/SRC/api_misc.html#Converters - :type quality: basestring + :type quality: str """ resample_ratio = target_sample_rate / self._sample_rate - new_samples = scikits.samplerate.resample( + self._samples = scikits.samplerate.resample( self._samples, r=resample_ratio, type=quality) - self._samples = new_samples self._sample_rate = target_sample_rate def pad_silence(self, duration, sides='both'): - """Pads this audio sample with a period of silence. + """Pad this audio sample with a period of silence. Note that this is an in-place transformation. - :param duration: Length of silence in seconds to pad + :param duration: Length of silence in seconds to pad. :type duration: float - :param sides: Position for padding - 'beginning' - adds silence in the beginning - 'end' - adds silence in the end + :param sides: Position for padding: + 'beginning' - adds silence in the beginning; + 'end' - adds silence in the end; 'both' - adds silence in both the beginning and the end. :type sides: str - :raises ValueError: If the sides not surport + :raises ValueError: If sides is not supported. """ if duration == 0.0: return self @@ -367,51 +363,41 @@ class AudioSegment(object): else: raise ValueError("Unknown value for the kwarg %s" % sides) self._samples = padded._samples - self._sample_rate = padded._sample_rate def subsegment(self, start_sec=None, end_sec=None): """Return new AudioSegment containing audio between given boundaries. - :param start_sec: Beginning of subsegment in seconds, - (beginning of segment if None). + :param start_sec: Beginning of subsegment in seconds. :type start_sec: float - :param end_sec: End of subsegment in seconds, - (end of segment if None). + :param end_sec: End of subsegment in seconds. :type end_sec: float - :return: New AudioSegment containing specified subsegment. - :rtype: AudioSegment """ start_sec = 0.0 if start_sec is None else start_sec end_sec = self.duration if end_sec is None else end_sec - # negative boundaries are relative to end of segment if start_sec < 0.0: start_sec = self.duration + start_sec if end_sec < 0.0: end_sec = self.duration + end_sec start_sample = int(round(start_sec * self._sample_rate)) end_sample = int(round(end_sec * self._sample_rate)) - samples = self._samples[start_sample:end_sample] - return type(self)(samples, sample_rate=self._sample_rate) + self._samples = self._samples[start_sample:end_sample] def random_subsegment(self, subsegment_length, rng=None): """Return a random subsegment of a specified length in seconds. :param subsegment_length: Subsegment length in seconds. :type subsegment_length: float - :param rng: Random number generator state + :param rng: Random number generator state. :type rng: random.Random - :return: New AudioSegment containing random subsegment - of original segment - :rtype: AudioSegment - :raises ValueError: If the length of subsegment greater than origineal - segemnt. + :raises ValueError: If the length of subsegment greater than + origineal segemnt. """ rng = random.Random() if rng is None else rng if subsegment_length > self.duration: raise ValueError("Length of subsegment must not be greater " "than original segment.") start_time = rng.uniform(0.0, self.duration - subsegment_length) - return self.subsegment(start_time, start_time + subsegment_length) + self.subsegment(start_time, start_time + subsegment_length) def convolve(self, impulse_segment, allow_resample=False): """Convolve this audio segment with the given filter. @@ -420,10 +406,10 @@ class AudioSegment(object): :param impulse_segment: Impulse response segments. :type impulse_segment: AudioSegment - :param allow_resample: indicates whether resampling is allowed when - the impulse_segment has a different sample - rate from this signal. - :type allow_resample: boolean + :param allow_resample: Indicates whether resampling is allowed when + the impulse_segment has a different sample + rate from this signal. + :type allow_resample: bool :raises ValueError: If the sample rate is not match between two audio segments and resample is not allowed. """ @@ -443,9 +429,10 @@ class AudioSegment(object): :param impulse_segment: Impulse response segments. :type impulse_segment: AudioSegment - :param allow_resample: indicates whether resampling is allowed when - the impulse_segment has a different sample rate from this signal. - :type allow_resample: boolean + :param allow_resample: Indicates whether resampling is allowed when + the impulse_segment has a different sample + rate from this signal. + :type allow_resample: bool """ target_db = self.rms_db self.convolve(impulse_segment, allow_resample=allow_resample) @@ -465,42 +452,36 @@ class AudioSegment(object): :type noise: AudioSegment :param snr_dB: Signal-to-Noise Ratio, in decibels. :type snr_dB: float - :param allow_downsampling: whether to allow the noise signal to be downsampled - to match the base signal sample rate. - :type allow_downsampling: boolean - :param max_gain_db: Maximum amount of gain to apply to noise signal before - adding it in. This is to prevent attempting to apply infinite - gain to a zero signal. + :param allow_downsampling: Whether to allow the noise signal to be + downsampled to match the base signal sample + rate. + :type allow_downsampling: bool + :param max_gain_db: Maximum amount of gain to apply to noise signal + before adding it in. This is to prevent attempting + to apply infinite gain to a zero signal. :type max_gain_db: float :param rng: Random number generator state. - :type rng: random.Random - :raises ValueError: If the sample rate does not match between the two audio segments - and resample is not allowed, or if the duration of noise segments - is shorter than original audio segments. + :type rng: None|random.Random + :raises ValueError: If the sample rate does not match between the two + audio segments and resample is not allowed, or if + the duration of noise segments is shorter than + original audio segments. """ rng = random.Random() if rng is None else rng if allow_downsampling and noise.sample_rate > self.sample_rate: noise = noise.resample(self.sample_rate) if noise.sample_rate != self.sample_rate: - raise ValueError("Noise sample rate (%d Hz) is not equal to " - "base signal sample rate (%d Hz)." % - (noise.sample_rate, self.sample_rate)) + raise ValueError("Noise sample rate (%d Hz) is not equal to base " + "signal sample rate (%d Hz)." % (noise.sample_rate, + self.sample_rate)) if noise.duration < self.duration: - raise ValueError("Noise signal (%f sec) must be at " - "least as long as base signal (%f sec)." % + raise ValueError("Noise signal (%f sec) must be at least as long as" + " base signal (%f sec)." % (noise.duration, self.duration)) - noise_gain_db = self.rms_db - noise.rms_db - snr_dB - noise_gain_db = min(max_gain_db, noise_gain_db) - noise_subsegment = noise.random_subsegment(self.duration, rng=rng) - output = self + self.tranform_noise(noise_subsegment, noise_gain_db) - self._samples = output._samples - self._sample_rate = output._sample_rate - - def tranform_noise(self, noise_subsegment, noise_gain_db): - """ tranform noise file - """ - return type(self)(noise_subsegment._samples * (10.**( - noise_gain_db / 20.)), noise_subsegment._sample_rate) + noise_gain_db = min(self.rms_db - noise.rms_db - snr_dB, max_gain_db) + noise.random_subsegment(self.duration, rng=rng) + noise.apply_gain(noise_gain_db) + self.superimposed(noise) @property def samples(self): @@ -571,7 +552,7 @@ class AudioSegment(object): Audio sample type is usually integer or float-point. For integer type, float32 will be rescaled from [-1, 1] to the maximum range supported by the integer type. - + This is for writing a audio file. """ dtype = np.dtype(dtype) diff --git a/data_utils/speech.py b/data_utils/speech.py index 5d1fc15a7..443df68c6 100755 --- a/data_utils/speech.py +++ b/data_utils/speech.py @@ -67,20 +67,20 @@ class SpeechSegment(AudioSegment): @classmethod def concatenate(cls, *segments): - """Concatenate an arbitrary number of audio segments together. + """Concatenate an arbitrary number of speech segments together. - :param *segments: Input speech segments + :param *segments: Input speech segments. :type *segments: SpeechSegment :return: Speech segment instance. :rtype: SpeechSegment - :raises ValueError: If number of segments is zero, or if sample_rate - not match between two audio segments - :raises TypeError: If item of segments is not Audiosegment instance + :raises ValueError: If the number of segments is zero, or if the + sample_rate of any two segments does not match. + :raises TypeError: If every segment in is not Audiosegment instance. """ - # Perform basic sanity-checks. if len(segments) == 0: raise ValueError("No audio segments are given to concatenate.") sample_rate = segments[0]._sample_rate + transcripts = "" for seg in segments: if sample_rate != seg._sample_rate: raise ValueError("Can't concatenate segments with " @@ -88,8 +88,49 @@ class SpeechSegment(AudioSegment): if type(seg) is not cls: raise TypeError("Only speech segments of the same type " "instance can be concatenated.") + transcripts += seg._transcript samples = np.concatenate([seg.samples for seg in segments]) - return cls(samples, sample_rate, seg._transcript) + return cls(samples, sample_rate, transcripts) + + @classmethod + def slice_from_file(cls, filepath, start=None, end=None, transcript=""): + """Loads a small section of an speech without having to load + the entire file into the memory which can be incredibly wasteful. + + :param filepath: Filepath or file object to audio file. + :type filepath: basestring|file + :param start: Start time in seconds. If start is negative, it wraps + around from the end. If not provided, this function + reads from the very beginning. + :type start: float + :param end: End time in seconds. If end is negative, it wraps around + from the end. If not provided, the default behvaior is + to read to the end of the file. + :type end: float + :param transcript: Transcript text for the speech. if not provided, + the defaults is an empty string. + :type transript: basestring + :return: SpeechSegment instance of the specified slice of the input + speech file. + :rtype: SpeechSegment + """ + audio = Audiosegment.slice_from_file(filepath, start, end) + return cls(audio.samples, audio.sample_rate, transcripts) + + @classmethod + def make_silence(cls, duration, sample_rate): + """Creates a silent speech segment of the given duration and + sample rate. + + :param duration: Length of silence in seconds. + :type duration: float + :param sample_rate: Sample rate. + :type sample_rate: float + :return: Silence of the given duration. + :rtype: AudioSegment + """ + audio = AudioSegment.make_silence(duration, sample_rate) + return cls(audio.samples, audio.sample_rate, "") @property def transcript(self): From 26eb54eb37e0515f863243c133fe0a72bfd5c6af Mon Sep 17 00:00:00 2001 From: yangyaming Date: Sun, 18 Jun 2017 14:31:57 +0800 Subject: [PATCH 044/335] Follow comments. --- error_rate.py | 16 ++++++++------- tests/test_error_rate.py | 44 +++++++++++++++++++++++++++++++++++----- 2 files changed, 48 insertions(+), 12 deletions(-) diff --git a/error_rate.py b/error_rate.py index 2bb637114..08fe12558 100644 --- a/error_rate.py +++ b/error_rate.py @@ -1,9 +1,11 @@ # -*- coding: utf-8 -*- -""" - This module provides functions to calculate error rate in different level. - e.g. wer for word-level, cer for char-level. +"""This module provides functions to calculate error rate in different level. +e.g. wer for word-level, cer for char-level. """ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function import numpy as np @@ -42,8 +44,7 @@ def levenshtein_distance(ref, hyp): def wer(reference, hypothesis, ignore_case=False, delimiter=' '): - """ - Calculate word error rate (WER). WER compares reference text and + """Calculate word error rate (WER). WER compares reference text and hypothesis text in word-level. WER is defined as: .. math:: @@ -71,6 +72,7 @@ def wer(reference, hypothesis, ignore_case=False, delimiter=' '): :type delimiter: char :return: Word error rate. :rtype: float + :raises ValueError: If reference length is zero. """ if ignore_case == True: reference = reference.lower() @@ -88,8 +90,7 @@ def wer(reference, hypothesis, ignore_case=False, delimiter=' '): def cer(reference, hypothesis, ignore_case=False): - """ - Calculate charactor error rate (CER). CER compares reference text and + """Calculate charactor error rate (CER). CER compares reference text and hypothesis text in char-level. CER is defined as: .. math:: @@ -117,6 +118,7 @@ def cer(reference, hypothesis, ignore_case=False): :type ignore_case: bool :return: Character error rate. :rtype: float + :raises ValueError: If reference length is zero. """ if ignore_case == True: reference = reference.lower() diff --git a/tests/test_error_rate.py b/tests/test_error_rate.py index bb6dca30a..57a6ccd68 100644 --- a/tests/test_error_rate.py +++ b/tests/test_error_rate.py @@ -1,29 +1,63 @@ # -*- coding: utf-8 -*- +"""Test error rate.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + import unittest -import sys -sys.path.append('..') import error_rate class TestParse(unittest.TestCase): - def test_wer(self): + def test_wer_1(self): ref = 'i UM the PHONE IS i LEFT THE portable PHONE UPSTAIRS last night' hyp = 'i GOT IT TO the FULLEST i LOVE TO portable FROM OF STORES last night' word_error_rate = error_rate.wer(ref, hyp) self.assertTrue(abs(word_error_rate - 0.769230769231) < 1e-6) - def test_cer_en(self): + def test_wer_2(self): + ref = 'i UM the PHONE IS i LEFT THE portable PHONE UPSTAIRS last night' + word_error_rate = error_rate.wer(ref, ref) + self.assertEqual(word_error_rate, 0.0) + + def test_wer_3(self): + ref = ' ' + hyp = 'Hypothesis sentence' + try: + word_error_rate = error_rate.wer(ref, hyp) + except Exception as e: + self.assertTrue(isinstance(e, ValueError)) + + def test_cer_1(self): ref = 'werewolf' hyp = 'weae wolf' char_error_rate = error_rate.cer(ref, hyp) self.assertTrue(abs(char_error_rate - 0.25) < 1e-6) - def test_cer_zh(self): + def test_cer_2(self): + ref = 'werewolf' + char_error_rate = error_rate.cer(ref, ref) + self.assertEqual(char_error_rate, 0.0) + + def test_cer_3(self): ref = u'我是中国人' hyp = u'我是 美洲人' char_error_rate = error_rate.cer(ref, hyp) self.assertTrue(abs(char_error_rate - 0.6) < 1e-6) + def test_cer_4(self): + ref = u'我是中国人' + char_error_rate = error_rate.cer(ref, ref) + self.assertFalse(char_error_rate, 0.0) + + def test_cer_5(self): + ref = '' + hyp = 'Hypothesis' + try: + char_error_rate = error_rate.cer(ref, hyp) + except Exception as e: + self.assertTrue(isinstance(e, ValueError)) + if __name__ == '__main__': unittest.main() From b8341da63dfa2baccff73c197e0e3dae336ef4de Mon Sep 17 00:00:00 2001 From: chrisxu2016 <823254351@qq.com> Date: Sun, 18 Jun 2017 16:23:30 +0800 Subject: [PATCH 045/335] add audio augmentation --- data_utils/audio.py | 3 ++- data_utils/speech.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/data_utils/audio.py b/data_utils/audio.py index 1f75da8ac..3c671b69b 100755 --- a/data_utils/audio.py +++ b/data_utils/audio.py @@ -88,7 +88,8 @@ class AudioSegment(object): :rtype: AudioSegment :raises ValueError: If the number of segments is zero, or if the sample_rate of any two segments does not match. - :raises TypeError: If every segment in is not Audiosegment instance. + :raises TypeError: If every item in segments is not Audiosegment + instance. """ # Perform basic sanity-checks. if len(segments) == 0: diff --git a/data_utils/speech.py b/data_utils/speech.py index 443df68c6..66f22b247 100755 --- a/data_utils/speech.py +++ b/data_utils/speech.py @@ -75,7 +75,8 @@ class SpeechSegment(AudioSegment): :rtype: SpeechSegment :raises ValueError: If the number of segments is zero, or if the sample_rate of any two segments does not match. - :raises TypeError: If every segment in is not Audiosegment instance. + :raises TypeError: If every item in segments is not Audiosegment + instance. """ if len(segments) == 0: raise ValueError("No audio segments are given to concatenate.") From ff01d048d39854abf075a81320bddddcbc62f1f0 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Sun, 18 Jun 2017 16:36:52 +0800 Subject: [PATCH 046/335] final refining on old data provider: enable pruning & add evaluation & code cleanup --- decoder.py | 84 +++++++++++++++------ evaluate.py | 214 ++++++++++++++++++++++++++++++++++++++++++++++++++++ infer.py | 40 ++++++---- tune.py | 73 +++++++++--------- 4 files changed, 339 insertions(+), 72 deletions(-) create mode 100644 evaluate.py diff --git a/decoder.py b/decoder.py index 824ac9701..2ee89cbd0 100755 --- a/decoder.py +++ b/decoder.py @@ -5,7 +5,6 @@ import os from itertools import groupby import numpy as np -import copy import kenlm import multiprocessing @@ -73,11 +72,25 @@ class Scorer(object): return len(words) # execute evaluation - def __call__(self, sentence): + def __call__(self, sentence, log=False): + """ + Evaluation function + + :param sentence: The input sentence for evalutation + :type sentence: basestring + :param log: Whether return the score in log representation. + :type log: bool + :return: Evaluation score, in the decimal or log. + :rtype: float + """ lm = self.language_model_score(sentence) word_cnt = self.word_count(sentence) - score = np.power(lm, self._alpha) \ - * np.power(word_cnt, self._beta) + if log == False: + score = np.power(lm, self._alpha) \ + * np.power(word_cnt, self._beta) + else: + score = self._alpha * np.log(lm) \ + + self._beta * np.log(word_cnt) return score @@ -85,13 +98,14 @@ def ctc_beam_search_decoder(probs_seq, beam_size, vocabulary, blank_id=0, + cutoff_prob=1.0, ext_scoring_func=None, nproc=False): ''' Beam search decoder for CTC-trained network, using beam search with width beam_size to find many paths to one label, return beam_size labels in - the order of probabilities. The implementation is based on Prefix Beam - Search(https://arxiv.org/abs/1408.2873), and the unclear part is + the descending order of probabilities. The implementation is based on Prefix + Beam Search(https://arxiv.org/abs/1408.2873), and the unclear part is redesigned, need to be verified. :param probs_seq: 2-D list with length num_time_steps, each element @@ -102,22 +116,25 @@ def ctc_beam_search_decoder(probs_seq, :type beam_size: int :param vocabulary: Vocabulary list. :type vocabulary: list + :param blank_id: ID of blank, default 0. + :type blank_id: int + :param cutoff_prob: Cutoff probability in pruning, + default 1.0, no pruning. + :type cutoff_prob: float :param ext_scoring_func: External defined scoring function for partially decoded sentence, e.g. word count and language model. :type external_scoring_function: function - :param blank_id: id of blank, default 0. - :type blank_id: int :param nproc: Whether the decoder used in multiprocesses. :type nproc: bool - :return: Decoding log probability and result string. + :return: Decoding log probabilities and result sentences in descending order. :rtype: list ''' # dimension check for prob_list in probs_seq: if not len(prob_list) == len(vocabulary) + 1: - raise ValueError("probs dimension mismatchedd with vocabulary") + raise ValueError("probs dimension mismatched with vocabulary") num_time_steps = len(probs_seq) # blank_id check @@ -137,19 +154,35 @@ def ctc_beam_search_decoder(probs_seq, probs_b_prev, probs_nb_prev = {'\t': 1.0}, {'\t': 0.0} ## extend prefix in loop - for time_step in range(num_time_steps): + for time_step in xrange(num_time_steps): # the set containing candidate prefixes prefix_set_next = {} probs_b_cur, probs_nb_cur = {}, {} + prob = probs_seq[time_step] + prob_idx = [[i, prob[i]] for i in xrange(len(prob))] + cutoff_len = len(prob_idx) + #If pruning is enabled + if (cutoff_prob < 1.0): + prob_idx = sorted(prob_idx, key=lambda asd: asd[1], reverse=True) + cutoff_len = 0 + cum_prob = 0.0 + for i in xrange(len(prob_idx)): + cum_prob += prob_idx[i][1] + cutoff_len += 1 + if cum_prob >= cutoff_prob: + break + prob_idx = prob_idx[0:cutoff_len] + for l in prefix_set_prev: - prob = probs_seq[time_step] if not prefix_set_next.has_key(l): probs_b_cur[l], probs_nb_cur[l] = 0.0, 0.0 - # extend prefix by travering vocabulary - for c in range(0, probs_dim): + # extend prefix by travering prob_idx + for index in xrange(cutoff_len): + c, prob_c = prob_idx[index][0], prob_idx[index][1] + if c == blank_id: - probs_b_cur[l] += prob[c] * ( + probs_b_cur[l] += prob_c * ( probs_b_prev[l] + probs_nb_prev[l]) else: last_char = l[-1] @@ -159,18 +192,18 @@ def ctc_beam_search_decoder(probs_seq, probs_b_cur[l_plus], probs_nb_cur[l_plus] = 0.0, 0.0 if new_char == last_char: - probs_nb_cur[l_plus] += prob[c] * probs_b_prev[l] - probs_nb_cur[l] += prob[c] * probs_nb_prev[l] + probs_nb_cur[l_plus] += prob_c * probs_b_prev[l] + probs_nb_cur[l] += prob_c * probs_nb_prev[l] elif new_char == ' ': if (ext_scoring_func is None) or (len(l) == 1): score = 1.0 else: prefix = l[1:] score = ext_scoring_func(prefix) - probs_nb_cur[l_plus] += score * prob[c] * ( + probs_nb_cur[l_plus] += score * prob_c * ( probs_b_prev[l] + probs_nb_prev[l]) else: - probs_nb_cur[l_plus] += prob[c] * ( + probs_nb_cur[l_plus] += prob_c * ( probs_b_prev[l] + probs_nb_prev[l]) # add l_plus into prefix_set_next prefix_set_next[l_plus] = probs_nb_cur[ @@ -203,6 +236,7 @@ def ctc_beam_search_decoder_nproc(probs_split, beam_size, vocabulary, blank_id=0, + cutoff_prob=1.0, ext_scoring_func=None, num_processes=None): ''' @@ -216,16 +250,19 @@ def ctc_beam_search_decoder_nproc(probs_split, :type beam_size: int :param vocabulary: Vocabulary list. :type vocabulary: list + :param blank_id: ID of blank, default 0. + :type blank_id: int + :param cutoff_prob: Cutoff probability in pruning, + default 0, no pruning. + :type cutoff_prob: float :param ext_scoring_func: External defined scoring function for partially decoded sentence, e.g. word count and language model. :type external_scoring_function: function - :param blank_id: id of blank, default 0. - :type blank_id: int :param num_processes: Number of processes, default None, equal to the number of CPUs. :type num_processes: int - :return: Decoding log probability and result string. + :return: Decoding log probabilities and result sentences in descending order. :rtype: list ''' @@ -243,7 +280,8 @@ def ctc_beam_search_decoder_nproc(probs_split, pool = multiprocessing.Pool(processes=num_processes) results = [] for i, probs_list in enumerate(probs_split): - args = (probs_list, beam_size, vocabulary, blank_id, None, nproc) + args = (probs_list, beam_size, vocabulary, blank_id, cutoff_prob, None, + nproc) results.append(pool.apply_async(ctc_beam_search_decoder, args)) pool.close() diff --git a/evaluate.py b/evaluate.py new file mode 100644 index 000000000..7c05a309d --- /dev/null +++ b/evaluate.py @@ -0,0 +1,214 @@ +""" + Evaluation for a simplifed version of Baidu DeepSpeech2 model. +""" + +import paddle.v2 as paddle +import distutils.util +import argparse +import gzip +from audio_data_utils import DataGenerator +from model import deep_speech2 +from decoder import * +from error_rate import wer + +parser = argparse.ArgumentParser( + description='Simplified version of DeepSpeech2 evaluation.') +parser.add_argument( + "--num_samples", + default=100, + type=int, + help="Number of samples for evaluation. (default: %(default)s)") +parser.add_argument( + "--num_conv_layers", + default=2, + type=int, + help="Convolution layer number. (default: %(default)s)") +parser.add_argument( + "--num_rnn_layers", + default=3, + type=int, + help="RNN layer number. (default: %(default)s)") +parser.add_argument( + "--rnn_layer_size", + default=512, + type=int, + help="RNN layer cell number. (default: %(default)s)") +parser.add_argument( + "--use_gpu", + default=True, + type=distutils.util.strtobool, + help="Use gpu or not. (default: %(default)s)") +parser.add_argument( + "--decode_method", + default='beam_search_nproc', + type=str, + help="Method for ctc decoding, best_path, " + "beam_search or beam_search_nproc. (default: %(default)s)") +parser.add_argument( + "--language_model_path", + default="./data/1Billion.klm", + type=str, + help="Path for language model. (default: %(default)s)") +parser.add_argument( + "--alpha", + default=0.26, + type=float, + help="Parameter associated with language model. (default: %(default)f)") +parser.add_argument( + "--beta", + default=0.1, + type=float, + help="Parameter associated with word count. (default: %(default)f)") +parser.add_argument( + "--cutoff_prob", + default=0.99, + type=float, + help="The cutoff probability of pruning" + "in beam search. (default: %(default)f)") +parser.add_argument( + "--beam_size", + default=500, + type=int, + help="Width for beam search decoding. (default: %(default)d)") +parser.add_argument( + "--normalizer_manifest_path", + default='data/manifest.libri.train-clean-100', + type=str, + help="Manifest path for normalizer. (default: %(default)s)") +parser.add_argument( + "--decode_manifest_path", + default='data/manifest.libri.test-clean', + type=str, + help="Manifest path for decoding. (default: %(default)s)") +parser.add_argument( + "--model_filepath", + default='./params.tar.gz', + type=str, + help="Model filepath. (default: %(default)s)") +parser.add_argument( + "--vocab_filepath", + default='data/eng_vocab.txt', + type=str, + help="Vocabulary filepath. (default: %(default)s)") +args = parser.parse_args() + + +def evaluate(): + """ + Evaluate on whole test data for DeepSpeech2. + """ + # initialize data generator + data_generator = DataGenerator( + vocab_filepath=args.vocab_filepath, + normalizer_manifest_path=args.normalizer_manifest_path, + normalizer_num_samples=200, + max_duration=20.0, + min_duration=0.0, + stride_ms=10, + window_ms=20) + + # create network config + dict_size = data_generator.vocabulary_size() + vocab_list = data_generator.vocabulary_list() + audio_data = paddle.layer.data( + name="audio_spectrogram", + height=161, + width=2000, + type=paddle.data_type.dense_vector(322000)) + text_data = paddle.layer.data( + name="transcript_text", + type=paddle.data_type.integer_value_sequence(dict_size)) + output_probs = deep_speech2( + audio_data=audio_data, + text_data=text_data, + dict_size=dict_size, + num_conv_layers=args.num_conv_layers, + num_rnn_layers=args.num_rnn_layers, + rnn_size=args.rnn_layer_size, + is_inference=True) + + # load parameters + parameters = paddle.parameters.Parameters.from_tar( + gzip.open(args.model_filepath)) + + # prepare infer data + feeding = data_generator.data_name_feeding() + test_batch_reader = data_generator.batch_reader_creator( + manifest_path=args.decode_manifest_path, + batch_size=args.num_samples, + padding_to=2000, + flatten=True, + sort_by_duration=False, + shuffle=False) + + # define inferer + inferer = paddle.inference.Inference( + output_layer=output_probs, parameters=parameters) + + # initialize external scorer for beam search decoding + if args.decode_method == 'beam_search' or \ + args.decode_method == 'beam_search_nproc': + ext_scorer = Scorer(args.alpha, args.beta, args.language_model_path) + + wer_counter, wer_sum = 0, 0.0 + for infer_data in test_batch_reader(): + # run inference + infer_results = inferer.infer(input=infer_data) + num_steps = len(infer_results) / len(infer_data) + probs_split = [ + infer_results[i * num_steps:(i + 1) * num_steps] + for i in xrange(0, len(infer_data)) + ] + + # decode and print + # best path decode + if args.decode_method == "best_path": + for i, probs in enumerate(probs_split): + output_transcription = ctc_decode( + probs_seq=probs, vocabulary=vocab_list, method="best_path") + target_transcription = ''.join( + [vocab_list[index] for index in infer_data[i][1]]) + wer_sum += wer(target_transcription, output_transcription) + wer_counter += 1 + # beam search decode in single process + elif args.decode_method == "beam_search": + for i, probs in enumerate(probs_split): + target_transcription = ''.join( + [vocab_list[index] for index in infer_data[i][1]]) + beam_search_result = ctc_beam_search_decoder( + probs_seq=probs, + vocabulary=vocab_list, + beam_size=args.beam_size, + blank_id=len(vocab_list), + ext_scoring_func=ext_scorer, + cutoff_prob=args.cutoff_prob, ) + wer_sum += wer(target_transcription, beam_search_result[0][1]) + wer_counter += 1 + # beam search using multiple processes + elif args.decode_method == "beam_search_nproc": + beam_search_nproc_results = ctc_beam_search_decoder_nproc( + probs_split=probs_split, + vocabulary=vocab_list, + beam_size=args.beam_size, + blank_id=len(vocab_list), + ext_scoring_func=ext_scorer, + cutoff_prob=args.cutoff_prob, ) + for i, beam_search_result in enumerate(beam_search_nproc_results): + target_transcription = ''.join( + [vocab_list[index] for index in infer_data[i][1]]) + wer_sum += wer(target_transcription, beam_search_result[0][1]) + wer_counter += 1 + else: + raise ValueError("Decoding method [%s] is not supported." % method) + + print("Cur WER = %f" % (wer_sum / wer_counter)) + print("Final WER = %f" % (wer_sum / wer_counter)) + + +def main(): + paddle.init(use_gpu=args.use_gpu, trainer_count=1) + evaluate() + + +if __name__ == '__main__': + main() diff --git a/infer.py b/infer.py index bb9dfa0a6..64fe1524e 100644 --- a/infer.py +++ b/infer.py @@ -9,14 +9,14 @@ import gzip from audio_data_utils import DataGenerator from model import deep_speech2 from decoder import * -import kenlm from error_rate import wer +import time parser = argparse.ArgumentParser( description='Simplified version of DeepSpeech2 inference.') parser.add_argument( "--num_samples", - default=10, + default=100, type=int, help="Number of samples for inference. (default: %(default)s)") parser.add_argument( @@ -46,7 +46,7 @@ parser.add_argument( help="Manifest path for normalizer. (default: %(default)s)") parser.add_argument( "--decode_manifest_path", - default='data/manifest.libri.test-clean', + default='data/manifest.libri.test-100sample', type=str, help="Manifest path for decoding. (default: %(default)s)") parser.add_argument( @@ -63,11 +63,13 @@ parser.add_argument( "--decode_method", default='beam_search_nproc', type=str, - help="Method for ctc decoding, best_path, beam_search or beam_search_nproc. (default: %(default)s)" -) + help="Method for ctc decoding:" + " best_path," + " beam_search, " + " or beam_search_nproc. (default: %(default)s)") parser.add_argument( "--beam_size", - default=50, + default=500, type=int, help="Width for beam search decoding. (default: %(default)d)") parser.add_argument( @@ -82,14 +84,20 @@ parser.add_argument( help="Path for language model. (default: %(default)s)") parser.add_argument( "--alpha", - default=0.0, + default=0.26, type=float, help="Parameter associated with language model. (default: %(default)f)") parser.add_argument( "--beta", - default=0.0, + default=0.1, type=float, help="Parameter associated with word count. (default: %(default)f)") +parser.add_argument( + "--cutoff_prob", + default=0.99, + type=float, + help="The cutoff probability of pruning" + "in beam search. (default: %(default)f)") args = parser.parse_args() @@ -154,6 +162,7 @@ def infer(): ## decode and print # best path decode wer_sum, wer_counter = 0, 0 + total_time = 0.0 if args.decode_method == "best_path": for i, probs in enumerate(probs_split): target_transcription = ''.join( @@ -177,11 +186,12 @@ def infer(): probs_seq=probs, vocabulary=vocab_list, beam_size=args.beam_size, - ext_scoring_func=ext_scorer, - blank_id=len(vocab_list)) + blank_id=len(vocab_list), + cutoff_prob=args.cutoff_prob, + ext_scoring_func=ext_scorer, ) print("\nTarget Transcription:\t%s" % target_transcription) - for index in range(args.num_results_per_sample): + for index in xrange(args.num_results_per_sample): result = beam_search_result[index] #output: index, log prob, beam result print("Beam %d: %f \t%s" % (index, result[0], result[1])) @@ -190,21 +200,21 @@ def infer(): wer_counter += 1 print("cur wer = %f , average wer = %f" % (wer_cur, wer_sum / wer_counter)) - # beam search using multiple processes elif args.decode_method == "beam_search_nproc": ext_scorer = Scorer(args.alpha, args.beta, args.language_model_path) beam_search_nproc_results = ctc_beam_search_decoder_nproc( probs_split=probs_split, vocabulary=vocab_list, beam_size=args.beam_size, - ext_scoring_func=ext_scorer, - blank_id=len(vocab_list)) + blank_id=len(vocab_list), + cutoff_prob=args.cutoff_prob, + ext_scoring_func=ext_scorer, ) for i, beam_search_result in enumerate(beam_search_nproc_results): target_transcription = ''.join( [vocab_list[index] for index in infer_data[i][1]]) print("\nTarget Transcription:\t%s" % target_transcription) - for index in range(args.num_results_per_sample): + for index in xrange(args.num_results_per_sample): result = beam_search_result[index] #output: index, log prob, beam result print("Beam %d: %f \t%s" % (index, result[0], result[1])) diff --git a/tune.py b/tune.py index 3eb826489..58a8a0d1b 100644 --- a/tune.py +++ b/tune.py @@ -1,5 +1,5 @@ """ - Tune parameters for beam search decoder in Deep Speech 2. + Parameters tuning for beam search decoder in Deep Speech 2. """ import paddle.v2 as paddle @@ -12,7 +12,7 @@ from decoder import * from error_rate import wer parser = argparse.ArgumentParser( - description='Parameters tuning script for ctc beam search decoder in Deep Speech 2.' + description='Parameters tuning for ctc beam search decoder in Deep Speech 2.' ) parser.add_argument( "--num_samples", @@ -82,34 +82,40 @@ parser.add_argument( help="Path for language model. (default: %(default)s)") parser.add_argument( "--alpha_from", - default=0.0, + default=0.1, type=float, - help="Where alpha starts from, <= alpha_to. (default: %(default)f)") + help="Where alpha starts from. (default: %(default)f)") parser.add_argument( - "--alpha_stride", - default=0.001, - type=float, - help="Step length for varying alpha. (default: %(default)f)") + "--num_alphas", + default=14, + type=int, + help="Number of candidate alphas. (default: %(default)d)") parser.add_argument( "--alpha_to", - default=0.01, + default=0.36, type=float, - help="Where alpha ends with, >= alpha_from. (default: %(default)f)") + help="Where alpha ends with. (default: %(default)f)") parser.add_argument( "--beta_from", - default=0.0, + default=0.05, type=float, - help="Where beta starts from, <= beta_to. (default: %(default)f)") + help="Where beta starts from. (default: %(default)f)") parser.add_argument( - "--beta_stride", - default=0.01, + "--num_betas", + default=20, type=float, - help="Step length for varying beta. (default: %(default)f)") + help="Number of candidate betas. (default: %(default)d)") parser.add_argument( "--beta_to", - default=0.0, + default=1.0, type=float, - help="Where beta ends with, >= beta_from. (default: %(default)f)") + help="Where beta ends with. (default: %(default)f)") +parser.add_argument( + "--cutoff_prob", + default=0.99, + type=float, + help="The cutoff probability of pruning" + "in beam search. (default: %(default)f)") args = parser.parse_args() @@ -118,15 +124,11 @@ def tune(): Tune parameters alpha and beta on one minibatch. """ - if not args.alpha_from <= args.alpha_to: - raise ValueError("alpha_from <= alpha_to doesn't satisfy!") - if not args.alpha_stride > 0: - raise ValueError("alpha_stride shouldn't be negative!") + if not args.num_alphas >= 0: + raise ValueError("num_alphas must be non-negative!") - if not args.beta_from <= args.beta_to: - raise ValueError("beta_from <= beta_to doesn't satisfy!") - if not args.beta_stride > 0: - raise ValueError("beta_stride shouldn't be negative!") + if not args.num_betas >= 0: + raise ValueError("num_betas must be non-negative!") # initialize data generator data_generator = DataGenerator( @@ -171,6 +173,7 @@ def tune(): flatten=True, sort_by_duration=False, shuffle=False) + # get one batch data for tuning infer_data = test_batch_reader().next() # run inference @@ -182,11 +185,12 @@ def tune(): for i in xrange(0, len(infer_data)) ] - cand_alpha = np.arange(args.alpha_from, args.alpha_to + args.alpha_stride, - args.alpha_stride) - cand_beta = np.arange(args.beta_from, args.beta_to + args.beta_stride, - args.beta_stride) - params_grid = [(alpha, beta) for alpha in cand_alpha for beta in cand_beta] + # create grid for search + cand_alphas = np.linspace(args.alpha_from, args.alpha_to, args.num_alphas) + cand_betas = np.linspace(args.beta_from, args.beta_to, args.num_betas) + params_grid = [(alpha, beta) for alpha in cand_alphas + for beta in cand_betas] + ## tune parameters in loop for (alpha, beta) in params_grid: wer_sum, wer_counter = 0, 0 @@ -200,8 +204,9 @@ def tune(): probs_seq=probs, vocabulary=vocab_list, beam_size=args.beam_size, - ext_scoring_func=ext_scorer, - blank_id=len(vocab_list)) + blank_id=len(vocab_list), + cutoff_prob=args.cutoff_prob, + ext_scoring_func=ext_scorer, ) wer_sum += wer(target_transcription, beam_search_result[0][1]) wer_counter += 1 # beam search using multiple processes @@ -210,9 +215,9 @@ def tune(): probs_split=probs_split, vocabulary=vocab_list, beam_size=args.beam_size, - ext_scoring_func=ext_scorer, + cutoff_prob=args.cutoff_prob, blank_id=len(vocab_list), - num_processes=1) + ext_scoring_func=ext_scorer, ) for i, beam_search_result in enumerate(beam_search_nproc_results): target_transcription = ''.join( [vocab_list[index] for index in infer_data[i][1]]) From 107f8b89ae5f961748b89dfe1153cf4ef0288c6b Mon Sep 17 00:00:00 2001 From: chrisxu2016 <823254351@qq.com> Date: Sun, 18 Jun 2017 16:47:09 +0800 Subject: [PATCH 047/335] add audio augmentation --- data_utils/audio.py | 6 +++--- data_utils/speech.py | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/data_utils/audio.py b/data_utils/audio.py index 3c671b69b..1ad20bf32 100755 --- a/data_utils/audio.py +++ b/data_utils/audio.py @@ -88,7 +88,7 @@ class AudioSegment(object): :rtype: AudioSegment :raises ValueError: If the number of segments is zero, or if the sample_rate of any two segments does not match. - :raises TypeError: If every item in segments is not Audiosegment + :raises TypeError: If every item in segments is not AudioSegment instance. """ # Perform basic sanity-checks. @@ -296,7 +296,7 @@ class AudioSegment(object): :type prior_db: float :param prior_samples: Prior strength in number of samples. :type prior_samples: float - :param startup_delay: Default 0.0 s. If provided, this function will + :param startup_delay: Default 0.0s. If provided, this function will accrue statistics for the first startup_delay seconds before applying online normalization. :type startup_delay: float @@ -401,7 +401,7 @@ class AudioSegment(object): self.subsegment(start_time, start_time + subsegment_length) def convolve(self, impulse_segment, allow_resample=False): - """Convolve this audio segment with the given filter. + """Convolve this audio segment with the given impulse_segment. Note that this is an in-place transformation. diff --git a/data_utils/speech.py b/data_utils/speech.py index 66f22b247..94ead1e8f 100755 --- a/data_utils/speech.py +++ b/data_utils/speech.py @@ -75,11 +75,11 @@ class SpeechSegment(AudioSegment): :rtype: SpeechSegment :raises ValueError: If the number of segments is zero, or if the sample_rate of any two segments does not match. - :raises TypeError: If every item in segments is not Audiosegment + :raises TypeError: If every item in segments is not SpeechSegment instance. """ if len(segments) == 0: - raise ValueError("No audio segments are given to concatenate.") + raise ValueError("No speech segments are given to concatenate.") sample_rate = segments[0]._sample_rate transcripts = "" for seg in segments: @@ -116,7 +116,7 @@ class SpeechSegment(AudioSegment): :rtype: SpeechSegment """ audio = Audiosegment.slice_from_file(filepath, start, end) - return cls(audio.samples, audio.sample_rate, transcripts) + return cls(audio.samples, audio.sample_rate, transcript) @classmethod def make_silence(cls, duration, sample_rate): @@ -128,7 +128,7 @@ class SpeechSegment(AudioSegment): :param sample_rate: Sample rate. :type sample_rate: float :return: Silence of the given duration. - :rtype: AudioSegment + :rtype: SpeechSegment """ audio = AudioSegment.make_silence(duration, sample_rate) return cls(audio.samples, audio.sample_rate, "") From 21161b01653b98ea18903ff5bee07a127eee643d Mon Sep 17 00:00:00 2001 From: chrisxu2016 <823254351@qq.com> Date: Sun, 18 Jun 2017 17:11:58 +0800 Subject: [PATCH 048/335] add audio file --- data_utils/audio.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/data_utils/audio.py b/data_utils/audio.py index 1ad20bf32..fd1f93df5 100755 --- a/data_utils/audio.py +++ b/data_utils/audio.py @@ -87,7 +87,7 @@ class AudioSegment(object): :return: Audio segment instance as concatenating results. :rtype: AudioSegment :raises ValueError: If the number of segments is zero, or if the - sample_rate of any two segments does not match. + sample_rate of any two segment does not match. :raises TypeError: If every item in segments is not AudioSegment instance. """ @@ -412,7 +412,7 @@ class AudioSegment(object): rate from this signal. :type allow_resample: bool :raises ValueError: If the sample rate is not match between two - audio segments and resample is not allowed. + audio segments when resample is not allowed. """ if allow_resample and self.sample_rate != impulse_segment.sample_rate: impulse_segment = impulse_segment.resample(self.sample_rate) @@ -464,8 +464,8 @@ class AudioSegment(object): :param rng: Random number generator state. :type rng: None|random.Random :raises ValueError: If the sample rate does not match between the two - audio segments and resample is not allowed, or if - the duration of noise segments is shorter than + audio segments when downsampling is not allowed, or + if the duration of noise segments is shorter than original audio segments. """ rng = random.Random() if rng is None else rng From 36743d36897082289ab678a744d236699fd69ae3 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Sun, 18 Jun 2017 18:11:01 +0800 Subject: [PATCH 049/335] add scoring last word in beam search --- decoder.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/decoder.py b/decoder.py index 2ee89cbd0..37640aff3 100755 --- a/decoder.py +++ b/decoder.py @@ -222,8 +222,11 @@ def ctc_beam_search_decoder(probs_seq, beam_result = [] for (seq, prob) in prefix_set_prev.items(): - if prob > 0.0: + if prob > 0.0 and len(seq) > 1: result = seq[1:] + # score last word by external scorer + if (ext_scoring_func is not None) and (result[-1] != ' '): + prob = prob * ext_scoring_func(result) log_prob = np.log(prob) beam_result.append([log_prob, result]) From 25ce7ebe7b1029e823a9cdb758e808f6a0e0995e Mon Sep 17 00:00:00 2001 From: chrisxu2016 <823254351@qq.com> Date: Sun, 18 Jun 2017 18:22:48 +0800 Subject: [PATCH 050/335] add audio file --- data_utils/audio.py | 4 ++-- data_utils/speech.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/data_utils/audio.py b/data_utils/audio.py index fd1f93df5..37f4f0ba5 100755 --- a/data_utils/audio.py +++ b/data_utils/audio.py @@ -82,8 +82,8 @@ class AudioSegment(object): def concatenate(cls, *segments): """Concatenate an arbitrary number of audio segments together. - :param *segments: Input audio segments. - :type *segments: AudioSegment + :param *segments: Input audio segments to be concatenated. + :type *segments: tuple of AudioSegment :return: Audio segment instance as concatenating results. :rtype: AudioSegment :raises ValueError: If the number of segments is zero, or if the diff --git a/data_utils/speech.py b/data_utils/speech.py index 94ead1e8f..00190009a 100755 --- a/data_utils/speech.py +++ b/data_utils/speech.py @@ -69,8 +69,8 @@ class SpeechSegment(AudioSegment): def concatenate(cls, *segments): """Concatenate an arbitrary number of speech segments together. - :param *segments: Input speech segments. - :type *segments: SpeechSegment + :param *segments: Input speech segments to be concatenated. + :type *segments: tuple of SpeechSegment :return: Speech segment instance. :rtype: SpeechSegment :raises ValueError: If the number of segments is zero, or if the From ddb2bdc1906223733dd5b1a2ad15a54492681f5b Mon Sep 17 00:00:00 2001 From: chrisxu2016 <823254351@qq.com> Date: Mon, 19 Jun 2017 00:08:05 +0800 Subject: [PATCH 051/335] add audio file --- data_utils/audio.py | 64 ++++++++++++++++++++++++++++++-------------- data_utils/speech.py | 10 +++---- 2 files changed, 49 insertions(+), 25 deletions(-) diff --git a/data_utils/audio.py b/data_utils/audio.py index 37f4f0ba5..5d02feb60 100755 --- a/data_utils/audio.py +++ b/data_utils/audio.py @@ -9,6 +9,7 @@ import soundfile import scikits.samplerate from scipy import signal import random +import copy class AudioSegment(object): @@ -87,9 +88,8 @@ class AudioSegment(object): :return: Audio segment instance as concatenating results. :rtype: AudioSegment :raises ValueError: If the number of segments is zero, or if the - sample_rate of any two segment does not match. - :raises TypeError: If every item in segments is not AudioSegment - instance. + sample_rate of any segments does not match. + :raises TypeError: If any segment is not AudioSegment instance. """ # Perform basic sanity-checks. if len(segments) == 0: @@ -101,7 +101,7 @@ class AudioSegment(object): "different sample rates") if type(seg) is not cls: raise TypeError("Only audio segments of the same type " - "instance can be concatenated.") + "can be concatenated.") samples = np.concatenate([seg.samples for seg in segments]) return cls(samples, sample_rate) @@ -180,8 +180,7 @@ class AudioSegment(object): @classmethod def make_silence(cls, duration, sample_rate): - """Creates a silent audio segment of the given duration and - sample rate. + """Creates a silent audio segment of the given duration and sample rate. :param duration: Length of silence in seconds. :type duration: float @@ -193,15 +192,17 @@ class AudioSegment(object): samples = np.zeros(int(duration * sample_rate)) return cls(samples, sample_rate) - def superimposed(self, other): + def superimpose(self, other): """Add samples from another segment to those of this segment (sample-wise addition, not segment concatenation). + Note that this is an in-place transformation. + :param other: Segment containing samples to be added in. :type other: AudioSegments :raise TypeError: If type of two segments don't match. - :raise ValueError: If the sample_rate of two segments not equal, or if - the length of segments don't match. + :raise ValueError: If the sample rates of the two segments are not + equal, or if the lengths of segments don't match. """ if type(self) != type(other): raise TypeError("Cannot add segments of different types: %s " @@ -215,7 +216,7 @@ class AudioSegment(object): def to_bytes(self, dtype='float32'): """Create a byte string containing the audio content. - :param dtype: Data type for export samples. Options: 'int16','int32', + :param dtype: Data type for export samples. Options: 'int16', 'int32', 'float32', 'float64'. Default is 'float32'. :type dtype: str :return: Byte string containing audio content. @@ -362,16 +363,20 @@ class AudioSegment(object): elif sides == "both": padded = cls.concatenate(silence, self, silence) else: - raise ValueError("Unknown value for the kwarg %s" % sides) + raise ValueError("Unknown value for the sides %s" % sides) self._samples = padded._samples def subsegment(self, start_sec=None, end_sec=None): - """Return new AudioSegment containing audio between given boundaries. + """Cut the AudioSegment between given boundaries. + + Note that this is an in-place transformation. :param start_sec: Beginning of subsegment in seconds. :type start_sec: float :param end_sec: End of subsegment in seconds. :type end_sec: float + :raise ValueError: If start_sec or end_sec is incorrectly set, e.g. out + of bounds in time. """ start_sec = 0.0 if start_sec is None else start_sec end_sec = self.duration if end_sec is None else end_sec @@ -379,19 +384,33 @@ class AudioSegment(object): start_sec = self.duration + start_sec if end_sec < 0.0: end_sec = self.duration + end_sec + if start_sec < 0.0: + raise ValueError("The slice start position (%f s) is out of " + "bounds." % start_sec) + if end_sec < 0.0: + raise ValueError("The slice end position (%f s) is out of bounds." % + end_sec) + if start_sec > end_sec: + raise ValueError("The slice start position (%f s) is later than " + "the end position (%f s)." % (start_sec, end_sec)) + if end_sec > self.duration: + raise ValueError("The slice end position (%f s) is out of bounds " + "(> %f s)" % (end_sec, self.duration)) start_sample = int(round(start_sec * self._sample_rate)) end_sample = int(round(end_sec * self._sample_rate)) self._samples = self._samples[start_sample:end_sample] def random_subsegment(self, subsegment_length, rng=None): - """Return a random subsegment of a specified length in seconds. + """Cut the specified length of the audiosegment randomly. + + Note that this is an in-place transformation. :param subsegment_length: Subsegment length in seconds. :type subsegment_length: float :param rng: Random number generator state. :type rng: random.Random - :raises ValueError: If the length of subsegment greater than - origineal segemnt. + :raises ValueError: If the length of subsegment is greater than + the origineal segemnt. """ rng = random.Random() if rng is None else rng if subsegment_length > self.duration: @@ -401,7 +420,7 @@ class AudioSegment(object): self.subsegment(start_time, start_time + subsegment_length) def convolve(self, impulse_segment, allow_resample=False): - """Convolve this audio segment with the given impulse_segment. + """Convolve this audio segment with the given impulse segment. Note that this is an in-place transformation. @@ -428,6 +447,8 @@ class AudioSegment(object): """Convolve and normalize the resulting audio segment so that it has the same average power as the input signal. + Note that this is an in-place transformation. + :param impulse_segment: Impulse response segments. :type impulse_segment: AudioSegment :param allow_resample: Indicates whether resampling is allowed when @@ -445,10 +466,12 @@ class AudioSegment(object): allow_downsampling=False, max_gain_db=300.0, rng=None): - """Adds the given noise segment at a specific signal-to-noise ratio. + """Add the given noise segment at a specific signal-to-noise ratio. If the noise segment is longer than this segment, a random subsegment of matching length is sampled from it and used instead. + Note that this is an in-place transformation. + :param noise: Noise signal to add. :type noise: AudioSegment :param snr_dB: Signal-to-Noise Ratio, in decibels. @@ -480,9 +503,10 @@ class AudioSegment(object): " base signal (%f sec)." % (noise.duration, self.duration)) noise_gain_db = min(self.rms_db - noise.rms_db - snr_dB, max_gain_db) - noise.random_subsegment(self.duration, rng=rng) - noise.apply_gain(noise_gain_db) - self.superimposed(noise) + noise_new = copy.deepcopy(noise) + noise_new.random_subsegment(self.duration, rng=rng) + noise_new.apply_gain(noise_gain_db) + self.superimpose(noise_new) @property def samples(self): diff --git a/data_utils/speech.py b/data_utils/speech.py index 00190009a..fc031ff46 100755 --- a/data_utils/speech.py +++ b/data_utils/speech.py @@ -67,7 +67,8 @@ class SpeechSegment(AudioSegment): @classmethod def concatenate(cls, *segments): - """Concatenate an arbitrary number of speech segments together. + """Concatenate an arbitrary number of speech segments together, both + audio and transcript will be concatenated. :param *segments: Input speech segments to be concatenated. :type *segments: tuple of SpeechSegment @@ -75,8 +76,7 @@ class SpeechSegment(AudioSegment): :rtype: SpeechSegment :raises ValueError: If the number of segments is zero, or if the sample_rate of any two segments does not match. - :raises TypeError: If every item in segments is not SpeechSegment - instance. + :raises TypeError: If any segment is not SpeechSegment instance. """ if len(segments) == 0: raise ValueError("No speech segments are given to concatenate.") @@ -94,7 +94,7 @@ class SpeechSegment(AudioSegment): return cls(samples, sample_rate, transcripts) @classmethod - def slice_from_file(cls, filepath, start=None, end=None, transcript=""): + def slice_from_file(cls, filepath, start=None, end=None, transcript): """Loads a small section of an speech without having to load the entire file into the memory which can be incredibly wasteful. @@ -121,7 +121,7 @@ class SpeechSegment(AudioSegment): @classmethod def make_silence(cls, duration, sample_rate): """Creates a silent speech segment of the given duration and - sample rate. + sample rate, transcript will be an empty string. :param duration: Length of silence in seconds. :type duration: float From def66a32235f8e2942ddaf9c60ebed5cb52b6bf9 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Mon, 19 Jun 2017 11:31:34 +0800 Subject: [PATCH 052/335] Follow comments. --- error_rate.py | 18 ++++++++++++------ tests/test_error_rate.py | 8 ++------ 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/error_rate.py b/error_rate.py index 08fe12558..0cf17921c 100644 --- a/error_rate.py +++ b/error_rate.py @@ -2,14 +2,20 @@ """This module provides functions to calculate error rate in different level. e.g. wer for word-level, cer for char-level. """ - from __future__ import absolute_import from __future__ import division from __future__ import print_function + import numpy as np -def levenshtein_distance(ref, hyp): +def _levenshtein_distance(ref, hyp): + """Levenshtein distance is a string metric for measuring the difference between + two sequences. Informally, the levenshtein disctance is defined as the minimum + number of single-character edits (substitutions, insertions or deletions) + required to change one word into the other. We can naturally extend the edits to + word level when calculate levenshtein disctance for two sentences. + """ ref_len = len(ref) hyp_len = len(hyp) @@ -72,7 +78,7 @@ def wer(reference, hypothesis, ignore_case=False, delimiter=' '): :type delimiter: char :return: Word error rate. :rtype: float - :raises ValueError: If reference length is zero. + :raises ValueError: If the reference length is zero. """ if ignore_case == True: reference = reference.lower() @@ -84,7 +90,7 @@ def wer(reference, hypothesis, ignore_case=False, delimiter=' '): if len(ref_words) == 0: raise ValueError("Reference's word number should be greater than 0.") - edit_distance = levenshtein_distance(ref_words, hyp_words) + edit_distance = _levenshtein_distance(ref_words, hyp_words) wer = float(edit_distance) / len(ref_words) return wer @@ -118,7 +124,7 @@ def cer(reference, hypothesis, ignore_case=False): :type ignore_case: bool :return: Character error rate. :rtype: float - :raises ValueError: If reference length is zero. + :raises ValueError: If the reference length is zero. """ if ignore_case == True: reference = reference.lower() @@ -130,6 +136,6 @@ def cer(reference, hypothesis, ignore_case=False): if len(reference) == 0: raise ValueError("Length of reference should be greater than 0.") - edit_distance = levenshtein_distance(reference, hypothesis) + edit_distance = _levenshtein_distance(reference, hypothesis) cer = float(edit_distance) / len(reference) return cer diff --git a/tests/test_error_rate.py b/tests/test_error_rate.py index 57a6ccd68..be7313f35 100644 --- a/tests/test_error_rate.py +++ b/tests/test_error_rate.py @@ -23,10 +23,8 @@ class TestParse(unittest.TestCase): def test_wer_3(self): ref = ' ' hyp = 'Hypothesis sentence' - try: + with self.assertRaises(ValueError): word_error_rate = error_rate.wer(ref, hyp) - except Exception as e: - self.assertTrue(isinstance(e, ValueError)) def test_cer_1(self): ref = 'werewolf' @@ -53,10 +51,8 @@ class TestParse(unittest.TestCase): def test_cer_5(self): ref = '' hyp = 'Hypothesis' - try: + with self.assertRaises(ValueError): char_error_rate = error_rate.cer(ref, hyp) - except Exception as e: - self.assertTrue(isinstance(e, ValueError)) if __name__ == '__main__': From d35b747175e36b44c369ef9ceb2b9fd7f9cbd9ec Mon Sep 17 00:00:00 2001 From: yangyaming Date: Mon, 19 Jun 2017 23:24:58 +0800 Subject: [PATCH 053/335] Fix ci following: 1. Unify the dependency installation process in setup.sh. 2. Change the version of package scipy from 0.13.0b1 to 0.13.1 --- requirements.txt | 3 +-- setup.sh | 28 ++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+), 2 deletions(-) create mode 100644 setup.sh diff --git a/requirements.txt b/requirements.txt index c37e88ffe..0183ecf01 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,3 @@ SoundFile==0.9.0.post1 wget==3.2 -scikits.samplerate==0.3.3 -scipy==0.13.0b1 +scipy==0.13.1 diff --git a/setup.sh b/setup.sh new file mode 100644 index 000000000..c59ef82ff --- /dev/null +++ b/setup.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +# install python dependencies +if [ -f 'requirements.txt' ]; then + pip install -r requirements.txt +fi + +if [ $? != 0 ]; then + exit 1 +fi + +# install scikits.samplerate +curl -O "http://www.mega-nerd.com/SRC/libsamplerate-0.1.9.tar.gz" +if [ $? != 0 ]; then + echo "Download libsamplerate-0.1.9.tar.gz failed !!!" + exit 1 +fi +tar -xvf libsamplerate-0.1.9.tar.gz +cd libsamplerate-0.1.9 +./configure && make && make install +cd - +rm -rf libsamplerate-0.1.9 +rm libsamplerate-0.1.9.tar.gz +pip install scikits.samplerate==0.3.3 +if [ $? != 0 ]; then + echo "Install sckikits.samplerate failed !!!" + exit 1 +fi From a5dcd23bf2c44ac261882c89649e7c296ef936b7 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Mon, 19 Jun 2017 23:46:44 +0800 Subject: [PATCH 054/335] Follow comments. --- README.md | 2 +- setup.sh | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 23e0b412b..0cdb203d2 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ Please replace `$PADDLE_INSTALL_DIR` with your own paddle installation directory. ``` -pip install -r requirements.txt +sh setup.sh export LD_LIBRARY_PATH=$PADDLE_INSTALL_DIR/Paddle/third_party/install/warpctc/lib:$LD_LIBRARY_PATH ``` diff --git a/setup.sh b/setup.sh index c59ef82ff..1ae2a5eee 100644 --- a/setup.sh +++ b/setup.sh @@ -4,8 +4,8 @@ if [ -f 'requirements.txt' ]; then pip install -r requirements.txt fi - if [ $? != 0 ]; then + echo "Install python dependencies failed !!!" exit 1 fi @@ -23,6 +23,8 @@ rm -rf libsamplerate-0.1.9 rm libsamplerate-0.1.9.tar.gz pip install scikits.samplerate==0.3.3 if [ $? != 0 ]; then - echo "Install sckikits.samplerate failed !!!" + echo "Install scikits.samplerate failed !!!" exit 1 fi + +echo "Install all dependencies successfully." From 0729abe02e787762acc0f0b30e4890b554f20d06 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Tue, 20 Jun 2017 12:14:24 +0800 Subject: [PATCH 055/335] tiny adjust --- decoder.py | 6 ++---- infer.py | 1 - 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/decoder.py b/decoder.py index 445831aab..a23fa1329 100644 --- a/decoder.py +++ b/decoder.py @@ -1,4 +1,4 @@ -"""Contains various CTC decoder.""" +"""Contains various CTC decoders.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function @@ -103,7 +103,7 @@ def ctc_beam_search_decoder(probs_seq, beam_size to find many paths to one label, return beam_size labels in the descending order of probabilities. The implementation is based on Prefix Beam Search(https://arxiv.org/abs/1408.2873), and the unclear part is - redesigned, need to be verified. + redesigned. :param probs_seq: 2-D list with length num_time_steps, each element is a list of normalized probabilities over vocabulary @@ -262,9 +262,7 @@ def ctc_beam_search_decoder_nproc(probs_split, :type num_processes: int :return: Decoding log probabilities and result sentences in descending order. :rtype: list - ''' - if num_processes is None: num_processes = multiprocessing.cpu_count() if not num_processes > 0: diff --git a/infer.py b/infer.py index 9f6d91ca5..4545f3da0 100644 --- a/infer.py +++ b/infer.py @@ -151,7 +151,6 @@ def infer(): ## decode and print # best path decode wer_sum, wer_counter = 0, 0 - total_time = 0.0 if args.decode_method == "best_path": for i, probs in enumerate(probs_split): target_transcription = ''.join([ From 115a06bb3739715d75cdadc3b6bc813acd328c99 Mon Sep 17 00:00:00 2001 From: chrisxu2016 <823254351@qq.com> Date: Tue, 20 Jun 2017 16:24:03 +0800 Subject: [PATCH 056/335] add augmentor class --- data_utils/audio.py | 2 +- data_utils/augmentor/augmentation.py | 9 ++++ .../online_bayesian_normalization.py | 50 +++++++++++++++++++ data_utils/augmentor/resample.py | 30 +++++++++++ data_utils/augmentor/speed_perturb.py | 43 ++++++++++++++++ data_utils/augmentor/volume_perturb.py | 2 +- 6 files changed, 134 insertions(+), 2 deletions(-) mode change 100644 => 100755 data_utils/audio.py mode change 100644 => 100755 data_utils/augmentor/augmentation.py create mode 100755 data_utils/augmentor/online_bayesian_normalization.py create mode 100755 data_utils/augmentor/resample.py create mode 100755 data_utils/augmentor/speed_perturb.py mode change 100644 => 100755 data_utils/augmentor/volume_perturb.py diff --git a/data_utils/audio.py b/data_utils/audio.py old mode 100644 new mode 100755 index 5d02feb60..03e2d5e40 --- a/data_utils/audio.py +++ b/data_utils/audio.py @@ -308,7 +308,7 @@ class AudioSegment(object): prior_mean_squared = 10.**(prior_db / 10.) prior_sum_of_squares = prior_mean_squared * prior_samples cumsum_of_squares = np.cumsum(self.samples**2) - sample_count = np.arange(len(self.num_samples)) + 1 + sample_count = np.arange(self.num_samples) + 1 if startup_sample_idx > 0: cumsum_of_squares[:startup_sample_idx] = \ cumsum_of_squares[startup_sample_idx] diff --git a/data_utils/augmentor/augmentation.py b/data_utils/augmentor/augmentation.py old mode 100644 new mode 100755 index abe1a0ec8..bfe7075e0 --- a/data_utils/augmentor/augmentation.py +++ b/data_utils/augmentor/augmentation.py @@ -6,6 +6,9 @@ from __future__ import print_function import json import random from data_utils.augmentor.volume_perturb import VolumePerturbAugmentor +from data_utils.augmentor.speed_perturb import SpeedPerturbAugmentor +from data_utils.augmentor.resample import ResampleAugmentor +from data_utils.augmentor.online_bayesian_normalization import OnlineBayesianNormalizationAugmentor class AugmentationPipeline(object): @@ -76,5 +79,11 @@ class AugmentationPipeline(object): """Return an augmentation model by the type name, and pass in params.""" if augmentor_type == "volume": return VolumePerturbAugmentor(self._rng, **params) + if augmentor_type == "speed": + return SpeedPerturbAugmentor(self._rng, **params) + if augmentor_type == "resample": + return ResampleAugmentor(self._rng, **params) + if augmentor_type == "baysian_normal": + return OnlineBayesianNormalizationAugmentor(self._rng, **params) else: raise ValueError("Unknown augmentor type [%s]." % augmentor_type) diff --git a/data_utils/augmentor/online_bayesian_normalization.py b/data_utils/augmentor/online_bayesian_normalization.py new file mode 100755 index 000000000..bb999912e --- /dev/null +++ b/data_utils/augmentor/online_bayesian_normalization.py @@ -0,0 +1,50 @@ +"""Contain the online bayesian normalization augmentation model.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from data_utils.augmentor.base import AugmentorBase + + +class OnlineBayesianNormalizationAugmentor(AugmentorBase): + """Augmentation model for adding online bayesian normalization. + + :param rng: Random generator object. + :type rng: random.Random + :param target_db: Target RMS value in decibels. + :type target_db: float + :param prior_db: Prior RMS estimate in decibels. + :type prior_db: float + :param prior_samples: Prior strength in number of samples. + :type prior_samples: int + :param startup_delay: Default 0.0s. If provided, this function will + accrue statistics for the first startup_delay + seconds before applying online normalization. + :type starup_delay: float. + """ + + def __init__(self, + rng, + target_db, + prior_db, + prior_samples, + startup_delay=0.0): + self._target_db = target_db + self._prior_db = prior_db + self._prior_samples = prior_samples + self._startup_delay = startup_delay + self._rng = rng + self._startup_delay=startup_delay + + def transform_audio(self, audio_segment): + """Normalizes the input audio using the online Bayesian approach. + + Note that this is an in-place transformation. + + :param audio_segment: Audio segment to add effects to. + :type audio_segment: AudioSegment|SpeechSegment + """ + audio_segment.normalize_online_bayesian(self._target_db, + self._prior_db, + self._prior_samples, + self._startup_delay) diff --git a/data_utils/augmentor/resample.py b/data_utils/augmentor/resample.py new file mode 100755 index 000000000..88ef7ed06 --- /dev/null +++ b/data_utils/augmentor/resample.py @@ -0,0 +1,30 @@ +"""Contain the resample augmentation model.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from data_utils.augmentor.base import AugmentorBase + + +class ResampleAugmentor(AugmentorBase): + """Augmentation model for resampling. + + :param rng: Random generator object. + :type rng: random.Random + :param new_sample_rate: New sample rate in Hz + :type new_sample_rate: int + """ + + def __init__(self, rng, new_sample_rate): + self._new_sample_rate = new_sample_rate + self._rng = rng + + def transform_audio(self, audio_segment): + """Resamples the input audio to a target sample rate. + + Note that this is an in-place transformation. + + :param audio: Audio segment to add effects to. + :type audio: AudioSegment|SpeechSegment + """ + audio_segment.resample(self._new_sample_rate) \ No newline at end of file diff --git a/data_utils/augmentor/speed_perturb.py b/data_utils/augmentor/speed_perturb.py new file mode 100755 index 000000000..67de344ce --- /dev/null +++ b/data_utils/augmentor/speed_perturb.py @@ -0,0 +1,43 @@ +"""Contain the speech perturbation augmentation model.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from data_utils.augmentor.base import AugmentorBase + + +class SpeedPerturbAugmentor(AugmentorBase): + """Augmentation model for adding speed perturbation. + + See reference paper here: + http://www.danielpovey.com/files/2015_interspeech_augmentation.pdf + + :param rng: Random generator object. + :type rng: random.Random + :param min_speed_rate: Lower bound of new speed rate to sample. + :type min_speed_rate: float + :param max_speed_rate: Upper bound of new speed rate to sample. + :type max_speed_rate: float + """ + + def __init__(self, rng, min_speed_rate, max_speed_rate): + + if (min_speed_rate < 0.5): + raise ValueError("Sampling speed below 0.9 can cause unnatural effects") + if (max_speed_rate > 1.5): + raise ValueError("Sampling speed above 1.1 can cause unnatural effects") + self._min_speed_rate = min_speed_rate + self._max_speed_rate = max_speed_rate + self._rng = rng + + def transform_audio(self, audio_segment): + """Sample a new speed rate from the given range and + changes the speed of the given audio clip. + + Note that this is an in-place transformation. + + :param audio_segment: Audio segment to add effects to. + :type audio_segment: AudioSegment|SpeechSegment + """ + sampled_speed = self._rng.uniform(self._min_speed_rate, self._max_speed_rate) + audio_segment.change_speed(sampled_speed) diff --git a/data_utils/augmentor/volume_perturb.py b/data_utils/augmentor/volume_perturb.py old mode 100644 new mode 100755 index a5a9f6cad..62631fb04 --- a/data_utils/augmentor/volume_perturb.py +++ b/data_utils/augmentor/volume_perturb.py @@ -36,5 +36,5 @@ class VolumePerturbAugmentor(AugmentorBase): :param audio_segment: Audio segment to add effects to. :type audio_segment: AudioSegmenet|SpeechSegment """ - gain = self._rng.uniform(min_gain_dBFS, max_gain_dBFS) + gain = self._rng.uniform(self._min_gain_dBFS, self._max_gain_dBFS) audio_segment.apply_gain(gain) From 71283d619da6fe0b11d26fde2c701118b55fc25a Mon Sep 17 00:00:00 2001 From: chrisxu2016 <823254351@qq.com> Date: Tue, 20 Jun 2017 16:33:28 +0800 Subject: [PATCH 057/335] add augmentor class --- data_utils/augmentor/resample.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/data_utils/augmentor/resample.py b/data_utils/augmentor/resample.py index 88ef7ed06..6634bbd53 100755 --- a/data_utils/augmentor/resample.py +++ b/data_utils/augmentor/resample.py @@ -11,7 +11,7 @@ class ResampleAugmentor(AugmentorBase): :param rng: Random generator object. :type rng: random.Random - :param new_sample_rate: New sample rate in Hz + :param new_sample_rate: New sample rate in Hz. :type new_sample_rate: int """ @@ -27,4 +27,4 @@ class ResampleAugmentor(AugmentorBase): :param audio: Audio segment to add effects to. :type audio: AudioSegment|SpeechSegment """ - audio_segment.resample(self._new_sample_rate) \ No newline at end of file + audio_segment.resample(self._new_sample_rate) From 1d8cc4a5a9bfd9eff50a9a971411333e9050ff83 Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Tue, 20 Jun 2017 17:06:53 +0800 Subject: [PATCH 058/335] Add multi-threading support for DS2 data generator. --- data_utils/data.py | 14 +++++++++++--- data_utils/speech.py | 2 +- infer.py | 8 +++++++- train.py | 22 +++++++++++++++++++++- 4 files changed, 40 insertions(+), 6 deletions(-) diff --git a/data_utils/data.py b/data_utils/data.py index 424343a48..8391dacc1 100644 --- a/data_utils/data.py +++ b/data_utils/data.py @@ -44,6 +44,8 @@ class DataGenerator(object): :types max_freq: None|float :param specgram_type: Specgram feature type. Options: 'linear'. :type specgram_type: str + :param num_threads: Number of CPU threads for processing data. + :type num_threads: int :param random_seed: Random seed. :type random_seed: int """ @@ -58,6 +60,7 @@ class DataGenerator(object): window_ms=20.0, max_freq=None, specgram_type='linear', + num_threads=12, random_seed=0): self._max_duration = max_duration self._min_duration = min_duration @@ -70,6 +73,7 @@ class DataGenerator(object): stride_ms=stride_ms, window_ms=window_ms, max_freq=max_freq) + self._num_threads = num_threads self._rng = random.Random(random_seed) self._epoch = 0 @@ -207,10 +211,14 @@ class DataGenerator(object): def reader(): for instance in manifest: - yield self._process_utterance(instance["audio_filepath"], - instance["text"]) + yield instance - return reader + def mapper(instance): + return self._process_utterance(instance["audio_filepath"], + instance["text"]) + + return paddle.reader.xmap_readers( + mapper, reader, self._num_threads, 1024, order=True) def _padding_batch(self, batch, padding_to=-1, flatten=False): """ diff --git a/data_utils/speech.py b/data_utils/speech.py index fc031ff46..568e4443b 100644 --- a/data_utils/speech.py +++ b/data_utils/speech.py @@ -94,7 +94,7 @@ class SpeechSegment(AudioSegment): return cls(samples, sample_rate, transcripts) @classmethod - def slice_from_file(cls, filepath, start=None, end=None, transcript): + def slice_from_file(cls, filepath, transcript, start=None, end=None): """Loads a small section of an speech without having to load the entire file into the memory which can be incredibly wasteful. diff --git a/infer.py b/infer.py index 06449ab05..7fc848296 100644 --- a/infer.py +++ b/infer.py @@ -38,6 +38,11 @@ parser.add_argument( default=True, type=distutils.util.strtobool, help="Use gpu or not. (default: %(default)s)") +parser.add_argument( + "--num_threads_data", + default=12, + type=int, + help="Number of cpu threads for preprocessing data. (default: %(default)s)") parser.add_argument( "--mean_std_filepath", default='mean_std.npz', @@ -67,7 +72,8 @@ def infer(): data_generator = DataGenerator( vocab_filepath=args.vocab_filepath, mean_std_filepath=args.mean_std_filepath, - augmentation_config='{}') + augmentation_config='{}', + num_threads=args.num_threads_data) # create network config # paddle.data_type.dense_array is used for variable batch input. diff --git a/train.py b/train.py index c60a039b6..2c3b8ce78 100644 --- a/train.py +++ b/train.py @@ -52,6 +52,18 @@ parser.add_argument( default=True, type=distutils.util.strtobool, help="Use sortagrad or not. (default: %(default)s)") +parser.add_argument( + "--max_duration", + default=100.0, + type=float, + help="Audios with duration larger than this will be discarded. " + "(default: %(default)s)") +parser.add_argument( + "--min_duration", + default=0.0, + type=float, + help="Audios with duration smaller than this will be discarded. " + "(default: %(default)s)") parser.add_argument( "--shuffle_method", default='instance_shuffle', @@ -63,6 +75,11 @@ parser.add_argument( default=4, type=int, help="Trainer number. (default: %(default)s)") +parser.add_argument( + "--num_threads_data", + default=12, + type=int, + help="Number of cpu threads for preprocessing data. (default: %(default)s)") parser.add_argument( "--mean_std_filepath", default='mean_std.npz', @@ -107,7 +124,10 @@ def train(): return DataGenerator( vocab_filepath=args.vocab_filepath, mean_std_filepath=args.mean_std_filepath, - augmentation_config=args.augmentation_config) + augmentation_config=args.augmentation_config, + max_duration=args.max_duration, + min_duration=args.min_duration, + num_threads=args.num_threads_data) train_generator = data_generator() test_generator = data_generator() From d104eccf6784585aa54d931b95db9364cac7744e Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Tue, 20 Jun 2017 18:13:46 +0800 Subject: [PATCH 059/335] Update the default num_threads for DS2 data generator. --- data_utils/data.py | 3 ++- infer.py | 3 ++- train.py | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/data_utils/data.py b/data_utils/data.py index 8391dacc1..44af7ffaa 100644 --- a/data_utils/data.py +++ b/data_utils/data.py @@ -7,6 +7,7 @@ from __future__ import print_function import random import numpy as np +import multiprocessing import paddle.v2 as paddle from data_utils import utils from data_utils.augmentor.augmentation import AugmentationPipeline @@ -60,7 +61,7 @@ class DataGenerator(object): window_ms=20.0, max_freq=None, specgram_type='linear', - num_threads=12, + num_threads=multiprocessing.cpu_count(), random_seed=0): self._max_duration = max_duration self._min_duration = min_duration diff --git a/infer.py b/infer.py index 7fc848296..71518133a 100644 --- a/infer.py +++ b/infer.py @@ -6,6 +6,7 @@ from __future__ import print_function import argparse import gzip import distutils.util +import multiprocessing import paddle.v2 as paddle from data_utils.data import DataGenerator from model import deep_speech2 @@ -40,7 +41,7 @@ parser.add_argument( help="Use gpu or not. (default: %(default)s)") parser.add_argument( "--num_threads_data", - default=12, + default=multiprocessing.cpu_count(), type=int, help="Number of cpu threads for preprocessing data. (default: %(default)s)") parser.add_argument( diff --git a/train.py b/train.py index 2c3b8ce78..fc23ec726 100644 --- a/train.py +++ b/train.py @@ -9,6 +9,7 @@ import argparse import gzip import time import distutils.util +import multiprocessing import paddle.v2 as paddle from model import deep_speech2 from data_utils.data import DataGenerator @@ -77,7 +78,7 @@ parser.add_argument( help="Trainer number. (default: %(default)s)") parser.add_argument( "--num_threads_data", - default=12, + default=multiprocessing.cpu_count(), type=int, help="Number of cpu threads for preprocessing data. (default: %(default)s)") parser.add_argument( From d64f470078056e1a0e3828ef30c6127596caa30c Mon Sep 17 00:00:00 2001 From: chrisxu2016 <823254351@qq.com> Date: Tue, 20 Jun 2017 18:19:43 +0800 Subject: [PATCH 060/335] add augmentor class --- data_utils/augmentor/augmentation.py | 2 +- tests/test_augmentor.py | 60 ++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+), 1 deletion(-) create mode 100755 tests/test_augmentor.py diff --git a/data_utils/augmentor/augmentation.py b/data_utils/augmentor/augmentation.py index bfe7075e0..087880086 100755 --- a/data_utils/augmentor/augmentation.py +++ b/data_utils/augmentor/augmentation.py @@ -83,7 +83,7 @@ class AugmentationPipeline(object): return SpeedPerturbAugmentor(self._rng, **params) if augmentor_type == "resample": return ResampleAugmentor(self._rng, **params) - if augmentor_type == "baysian_normal": + if augmentor_type == "bayesian_normal": return OnlineBayesianNormalizationAugmentor(self._rng, **params) else: raise ValueError("Unknown augmentor type [%s]." % augmentor_type) diff --git a/tests/test_augmentor.py b/tests/test_augmentor.py new file mode 100755 index 000000000..76fd321a2 --- /dev/null +++ b/tests/test_augmentor.py @@ -0,0 +1,60 @@ +"""Test augmentor class.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import unittest +from data_utils import audio +from data_utils.augmentor.augmentation import AugmentationPipeline +import random +import numpy as np + +random_seed=0 +#audio instance +audio_data=[3.05175781e-05, -8.54492188e-04, -1.09863281e-03, -9.46044922e-04,\ + -1.31225586e-03, -1.09863281e-03, -1.73950195e-03, -2.10571289e-03,\ + -2.04467773e-03, -1.46484375e-03, -1.43432617e-03, -9.46044922e-04,\ + -1.95312500e-03, -1.86157227e-03, -2.10571289e-03, -2.31933594e-03,\ + -2.01416016e-03, -2.62451172e-03, -2.07519531e-03, -2.38037109e-03] +audio_data = np.array(audio_data) +samplerate = 10 + +class TestAugmentor(unittest.TestCase): + def test_volume(self): + augmentation_config='[{"type": "volume","params": {"min_gain_dBFS": -15, "max_gain_dBFS": 15},"prob": 1.0}]' + augmentation_pipeline = AugmentationPipeline(augmentation_config=augmentation_config, + random_seed=random_seed) + audio_segment = audio.AudioSegment(audio_data, samplerate) + augmentation_pipeline.transform_audio(audio_segment) + original_audio = audio.AudioSegment(audio_data, samplerate) + self.assertFalse(np.any(audio_segment.samples == original_audio.samples)) + + def test_speed(self): + augmentation_config='[{"type": "speed","params": {"min_speed_rate": 1.2,"max_speed_rate": 1.4},"prob": 1.0}]' + augmentation_pipeline = AugmentationPipeline(augmentation_config=augmentation_config, + random_seed=random_seed) + audio_segment = audio.AudioSegment(audio_data, samplerate) + augmentation_pipeline.transform_audio(audio_segment) + original_audio = audio.AudioSegment(audio_data, samplerate) + self.assertFalse(np.any(audio_segment.samples == original_audio.samples)) + + def test_resample(self): + augmentation_config='[{"type": "resample","params": {"new_sample_rate":5},"prob": 1.0}]' + augmentation_pipeline = AugmentationPipeline(augmentation_config=augmentation_config, + random_seed=random_seed) + audio_segment = audio.AudioSegment(audio_data, samplerate) + augmentation_pipeline.transform_audio(audio_segment) + self.assertTrue(audio_segment.sample_rate == 5) + + def test_bayesial(self): + augmentation_config='[{"type": "bayesian_normal","params": {"target_db": -20, "prior_db": -4, "prior_samples": -8, "startup_delay": 0.0},"prob": 1.0}]' + augmentation_pipeline = AugmentationPipeline(augmentation_config=augmentation_config, + random_seed=random_seed) + audio_segment = audio.AudioSegment(audio_data, samplerate) + augmentation_pipeline.transform_audio(audio_segment) + original_audio = audio.AudioSegment(audio_data, samplerate) + self.assertFalse(np.any(audio_segment.samples == original_audio.samples)) + +if __name__ == '__main__': + unittest.main() + From df77c6d5dbb35a2ebd332aa9ad7044bddb52fe5e Mon Sep 17 00:00:00 2001 From: chrisxu2016 <823254351@qq.com> Date: Tue, 20 Jun 2017 18:39:48 +0800 Subject: [PATCH 061/335] Add 3 augmentor classes and related unittests --- tests/test_augmentor.py | 68 ++++++++++++++++++++++------------------- 1 file changed, 36 insertions(+), 32 deletions(-) diff --git a/tests/test_augmentor.py b/tests/test_augmentor.py index 76fd321a2..17491704d 100755 --- a/tests/test_augmentor.py +++ b/tests/test_augmentor.py @@ -11,49 +11,53 @@ import numpy as np random_seed=0 #audio instance -audio_data=[3.05175781e-05, -8.54492188e-04, -1.09863281e-03, -9.46044922e-04,\ - -1.31225586e-03, -1.09863281e-03, -1.73950195e-03, -2.10571289e-03,\ - -2.04467773e-03, -1.46484375e-03, -1.43432617e-03, -9.46044922e-04,\ - -1.95312500e-03, -1.86157227e-03, -2.10571289e-03, -2.31933594e-03,\ - -2.01416016e-03, -2.62451172e-03, -2.07519531e-03, -2.38037109e-03] +audio_data = [3.0517571e-05, -8.54492188e-04, -1.09863281e-03, -9.4604492e-04,\ + -1.31225586e-03, -1.09863281e-03, -1.73950195e-03, -2.1057189e-03,\ + -2.04467773e-03, -1.46484375e-03, -1.43432617e-03, -9.4604492e-04,\ + -1.95312500e-03, -1.86157227e-03, -2.10571289e-03, -2.3193354e-03,\ + -2.01416016e-03, -2.62451172e-03, -2.07519531e-03, -2.3803719e-03] audio_data = np.array(audio_data) samplerate = 10 class TestAugmentor(unittest.TestCase): def test_volume(self): - augmentation_config='[{"type": "volume","params": {"min_gain_dBFS": -15, "max_gain_dBFS": 15},"prob": 1.0}]' - augmentation_pipeline = AugmentationPipeline(augmentation_config=augmentation_config, - random_seed=random_seed) - audio_segment = audio.AudioSegment(audio_data, samplerate) - augmentation_pipeline.transform_audio(audio_segment) - original_audio = audio.AudioSegment(audio_data, samplerate) - self.assertFalse(np.any(audio_segment.samples == original_audio.samples)) + config_json = '[{"type": "volume","params": {"min_gain_dBFS": -15, '\ + '"max_gain_dBFS": 15},"prob": 1.0}]' + aug_pipeline = AugmentationPipeline(augmentation_config=config_json, + random_seed=random_seed) + audio_seg = audio.AudioSegment(audio_data, samplerate) + aug_pipeline.transform_audio(audio_seg) + orig_audio = audio.AudioSegment(audio_data, samplerate) + self.assertFalse(np.any(audio_seg.samples == orig_audio.samples)) def test_speed(self): - augmentation_config='[{"type": "speed","params": {"min_speed_rate": 1.2,"max_speed_rate": 1.4},"prob": 1.0}]' - augmentation_pipeline = AugmentationPipeline(augmentation_config=augmentation_config, - random_seed=random_seed) - audio_segment = audio.AudioSegment(audio_data, samplerate) - augmentation_pipeline.transform_audio(audio_segment) - original_audio = audio.AudioSegment(audio_data, samplerate) - self.assertFalse(np.any(audio_segment.samples == original_audio.samples)) + config_json = '[{"type":"speed","params": {"min_speed_rate": 1.2,' \ + '"max_speed_rate": 1.4},"prob": 1.0}]' + aug_pipeline = AugmentationPipeline(augmentation_config=config_json, + random_seed=random_seed) + audio_seg = audio.AudioSegment(audio_data, samplerate) + aug_pipeline.transform_audio(audio_seg) + orig_audio = audio.AudioSegment(audio_data, samplerate) + self.assertFalse(np.any(audio_seg.samples == orig_audio.samples)) def test_resample(self): - augmentation_config='[{"type": "resample","params": {"new_sample_rate":5},"prob": 1.0}]' - augmentation_pipeline = AugmentationPipeline(augmentation_config=augmentation_config, - random_seed=random_seed) - audio_segment = audio.AudioSegment(audio_data, samplerate) - augmentation_pipeline.transform_audio(audio_segment) - self.assertTrue(audio_segment.sample_rate == 5) + config_json = '[{"type":"resample","params": {"new_sample_rate":5},'\ + '"prob": 1.0}]' + aug_pipeline = AugmentationPipeline(augmentation_config=config_json, + random_seed=random_seed) + audio_seg = audio.AudioSegment(audio_data, samplerate) + aug_pipeline.transform_audio(audio_seg) + self.assertTrue(audio_seg.sample_rate == 5) def test_bayesial(self): - augmentation_config='[{"type": "bayesian_normal","params": {"target_db": -20, "prior_db": -4, "prior_samples": -8, "startup_delay": 0.0},"prob": 1.0}]' - augmentation_pipeline = AugmentationPipeline(augmentation_config=augmentation_config, - random_seed=random_seed) - audio_segment = audio.AudioSegment(audio_data, samplerate) - augmentation_pipeline.transform_audio(audio_segment) - original_audio = audio.AudioSegment(audio_data, samplerate) - self.assertFalse(np.any(audio_segment.samples == original_audio.samples)) + config_json = '[{"type":"bayesian_normal","params":{"target_db":-20,' \ + '"prior_db":-4, "prior_samples": -8, "startup_delay": 0.0},"prob":1.0}]' + aug_pipeline = AugmentationPipeline(augmentation_config=config_json, + random_seed=random_seed) + audio_seg = audio.AudioSegment(audio_data, samplerate) + aug_pipeline.transform_audio(audio_seg) + orig_audio = audio.AudioSegment(audio_data, samplerate) + self.assertFalse(np.any(audio_seg.samples == orig_audio.samples)) if __name__ == '__main__': unittest.main() From 5398360e5f5bcbc1d48945395204bd9b708a6768 Mon Sep 17 00:00:00 2001 From: chrisxu2016 <823254351@qq.com> Date: Tue, 20 Jun 2017 18:50:13 +0800 Subject: [PATCH 062/335] Add 3 augmentor classes and related unittests --- tests/test_augmentor.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/test_augmentor.py b/tests/test_augmentor.py index 17491704d..57596e63c 100755 --- a/tests/test_augmentor.py +++ b/tests/test_augmentor.py @@ -9,8 +9,7 @@ from data_utils.augmentor.augmentation import AugmentationPipeline import random import numpy as np -random_seed=0 -#audio instance +random_seed = 0 audio_data = [3.0517571e-05, -8.54492188e-04, -1.09863281e-03, -9.4604492e-04,\ -1.31225586e-03, -1.09863281e-03, -1.73950195e-03, -2.1057189e-03,\ -2.04467773e-03, -1.46484375e-03, -1.43432617e-03, -9.4604492e-04,\ @@ -19,12 +18,13 @@ audio_data = [3.0517571e-05, -8.54492188e-04, -1.09863281e-03, -9.4604492e-04,\ audio_data = np.array(audio_data) samplerate = 10 + class TestAugmentor(unittest.TestCase): def test_volume(self): config_json = '[{"type": "volume","params": {"min_gain_dBFS": -15, '\ '"max_gain_dBFS": 15},"prob": 1.0}]' - aug_pipeline = AugmentationPipeline(augmentation_config=config_json, - random_seed=random_seed) + aug_pipeline = AugmentationPipeline( + augmentation_config=config_json, random_seed=random_seed) audio_seg = audio.AudioSegment(audio_data, samplerate) aug_pipeline.transform_audio(audio_seg) orig_audio = audio.AudioSegment(audio_data, samplerate) @@ -33,8 +33,8 @@ class TestAugmentor(unittest.TestCase): def test_speed(self): config_json = '[{"type":"speed","params": {"min_speed_rate": 1.2,' \ '"max_speed_rate": 1.4},"prob": 1.0}]' - aug_pipeline = AugmentationPipeline(augmentation_config=config_json, - random_seed=random_seed) + aug_pipeline = AugmentationPipeline( + augmentation_config=config_json, random_seed=random_seed) audio_seg = audio.AudioSegment(audio_data, samplerate) aug_pipeline.transform_audio(audio_seg) orig_audio = audio.AudioSegment(audio_data, samplerate) @@ -43,8 +43,8 @@ class TestAugmentor(unittest.TestCase): def test_resample(self): config_json = '[{"type":"resample","params": {"new_sample_rate":5},'\ '"prob": 1.0}]' - aug_pipeline = AugmentationPipeline(augmentation_config=config_json, - random_seed=random_seed) + aug_pipeline = AugmentationPipeline( + augmentation_config=config_json, random_seed=random_seed) audio_seg = audio.AudioSegment(audio_data, samplerate) aug_pipeline.transform_audio(audio_seg) self.assertTrue(audio_seg.sample_rate == 5) @@ -52,13 +52,13 @@ class TestAugmentor(unittest.TestCase): def test_bayesial(self): config_json = '[{"type":"bayesian_normal","params":{"target_db":-20,' \ '"prior_db":-4, "prior_samples": -8, "startup_delay": 0.0},"prob":1.0}]' - aug_pipeline = AugmentationPipeline(augmentation_config=config_json, - random_seed=random_seed) + aug_pipeline = AugmentationPipeline( + augmentation_config=config_json, random_seed=random_seed) audio_seg = audio.AudioSegment(audio_data, samplerate) aug_pipeline.transform_audio(audio_seg) orig_audio = audio.AudioSegment(audio_data, samplerate) self.assertFalse(np.any(audio_seg.samples == orig_audio.samples)) + if __name__ == '__main__': unittest.main() - From 2450591a440dfc863cce53152416e594bdfff6b3 Mon Sep 17 00:00:00 2001 From: chrisxu2016 <823254351@qq.com> Date: Wed, 21 Jun 2017 11:47:15 +0800 Subject: [PATCH 063/335] add 3 augmentor class and change resample module --- data_utils/audio.py | 16 ++++---- data_utils/augmentor/resample.py | 5 ++- requirements.txt | 1 + tests/test_augmentor.py | 64 -------------------------------- 4 files changed, 12 insertions(+), 74 deletions(-) mode change 100644 => 100755 requirements.txt delete mode 100755 tests/test_augmentor.py diff --git a/data_utils/audio.py b/data_utils/audio.py index 03e2d5e40..f80425eac 100755 --- a/data_utils/audio.py +++ b/data_utils/audio.py @@ -6,7 +6,7 @@ from __future__ import print_function import numpy as np import io import soundfile -import scikits.samplerate +import resampy from scipy import signal import random import copy @@ -321,21 +321,19 @@ class AudioSegment(object): gain_db = target_db - rms_estimate_db self.apply_gain(gain_db) - def resample(self, target_sample_rate, quality='sinc_medium'): + def resample(self, target_sample_rate, filter='kaiser_best'): """Resample the audio to a target sample rate. Note that this is an in-place transformation. :param target_sample_rate: Target sample rate. :type target_sample_rate: int - :param quality: One of {'sinc_fastest', 'sinc_medium', 'sinc_best'}. - Sets resampling speed/quality tradeoff. - See http://www.mega-nerd.com/SRC/api_misc.html#Converters - :type quality: str + :param filter: The resampling filter to use one of {'kaiser_best', + 'kaiser_fast'}. + :type filter: str """ - resample_ratio = target_sample_rate / self._sample_rate - self._samples = scikits.samplerate.resample( - self._samples, r=resample_ratio, type=quality) + self._samples = resampy.resample( + self.samples, self.sample_rate, target_sample_rate, filter=filter) self._sample_rate = target_sample_rate def pad_silence(self, duration, sides='both'): diff --git a/data_utils/augmentor/resample.py b/data_utils/augmentor/resample.py index 6634bbd53..529b5fec1 100755 --- a/data_utils/augmentor/resample.py +++ b/data_utils/augmentor/resample.py @@ -8,6 +8,9 @@ from data_utils.augmentor.base import AugmentorBase class ResampleAugmentor(AugmentorBase): """Augmentation model for resampling. + + See more info here: + https://ccrma.stanford.edu/~jos/resample/index.html :param rng: Random generator object. :type rng: random.Random @@ -27,4 +30,4 @@ class ResampleAugmentor(AugmentorBase): :param audio: Audio segment to add effects to. :type audio: AudioSegment|SpeechSegment """ - audio_segment.resample(self._new_sample_rate) + audio_segment.resample(self._new_sample_rate) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt old mode 100644 new mode 100755 index 0183ecf01..d712787ff --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ SoundFile==0.9.0.post1 wget==3.2 scipy==0.13.1 +resampy==0.1.5 \ No newline at end of file diff --git a/tests/test_augmentor.py b/tests/test_augmentor.py deleted file mode 100755 index 57596e63c..000000000 --- a/tests/test_augmentor.py +++ /dev/null @@ -1,64 +0,0 @@ -"""Test augmentor class.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import unittest -from data_utils import audio -from data_utils.augmentor.augmentation import AugmentationPipeline -import random -import numpy as np - -random_seed = 0 -audio_data = [3.0517571e-05, -8.54492188e-04, -1.09863281e-03, -9.4604492e-04,\ - -1.31225586e-03, -1.09863281e-03, -1.73950195e-03, -2.1057189e-03,\ - -2.04467773e-03, -1.46484375e-03, -1.43432617e-03, -9.4604492e-04,\ - -1.95312500e-03, -1.86157227e-03, -2.10571289e-03, -2.3193354e-03,\ - -2.01416016e-03, -2.62451172e-03, -2.07519531e-03, -2.3803719e-03] -audio_data = np.array(audio_data) -samplerate = 10 - - -class TestAugmentor(unittest.TestCase): - def test_volume(self): - config_json = '[{"type": "volume","params": {"min_gain_dBFS": -15, '\ - '"max_gain_dBFS": 15},"prob": 1.0}]' - aug_pipeline = AugmentationPipeline( - augmentation_config=config_json, random_seed=random_seed) - audio_seg = audio.AudioSegment(audio_data, samplerate) - aug_pipeline.transform_audio(audio_seg) - orig_audio = audio.AudioSegment(audio_data, samplerate) - self.assertFalse(np.any(audio_seg.samples == orig_audio.samples)) - - def test_speed(self): - config_json = '[{"type":"speed","params": {"min_speed_rate": 1.2,' \ - '"max_speed_rate": 1.4},"prob": 1.0}]' - aug_pipeline = AugmentationPipeline( - augmentation_config=config_json, random_seed=random_seed) - audio_seg = audio.AudioSegment(audio_data, samplerate) - aug_pipeline.transform_audio(audio_seg) - orig_audio = audio.AudioSegment(audio_data, samplerate) - self.assertFalse(np.any(audio_seg.samples == orig_audio.samples)) - - def test_resample(self): - config_json = '[{"type":"resample","params": {"new_sample_rate":5},'\ - '"prob": 1.0}]' - aug_pipeline = AugmentationPipeline( - augmentation_config=config_json, random_seed=random_seed) - audio_seg = audio.AudioSegment(audio_data, samplerate) - aug_pipeline.transform_audio(audio_seg) - self.assertTrue(audio_seg.sample_rate == 5) - - def test_bayesial(self): - config_json = '[{"type":"bayesian_normal","params":{"target_db":-20,' \ - '"prior_db":-4, "prior_samples": -8, "startup_delay": 0.0},"prob":1.0}]' - aug_pipeline = AugmentationPipeline( - augmentation_config=config_json, random_seed=random_seed) - audio_seg = audio.AudioSegment(audio_data, samplerate) - aug_pipeline.transform_audio(audio_seg) - orig_audio = audio.AudioSegment(audio_data, samplerate) - self.assertFalse(np.any(audio_seg.samples == orig_audio.samples)) - - -if __name__ == '__main__': - unittest.main() From d6a852a304babcd916d35c58ec0470162891c583 Mon Sep 17 00:00:00 2001 From: chrisxu2016 <823254351@qq.com> Date: Wed, 21 Jun 2017 12:11:43 +0800 Subject: [PATCH 064/335] modify setup.sh to delete the install of libsamplerate --- .../augmentor/online_bayesian_normalization.py | 6 ++---- setup.sh | 18 ------------------ 2 files changed, 2 insertions(+), 22 deletions(-) diff --git a/data_utils/augmentor/online_bayesian_normalization.py b/data_utils/augmentor/online_bayesian_normalization.py index bb999912e..e488ac7d6 100755 --- a/data_utils/augmentor/online_bayesian_normalization.py +++ b/data_utils/augmentor/online_bayesian_normalization.py @@ -32,9 +32,8 @@ class OnlineBayesianNormalizationAugmentor(AugmentorBase): self._target_db = target_db self._prior_db = prior_db self._prior_samples = prior_samples - self._startup_delay = startup_delay self._rng = rng - self._startup_delay=startup_delay + self._startup_delay = startup_delay def transform_audio(self, audio_segment): """Normalizes the input audio using the online Bayesian approach. @@ -44,7 +43,6 @@ class OnlineBayesianNormalizationAugmentor(AugmentorBase): :param audio_segment: Audio segment to add effects to. :type audio_segment: AudioSegment|SpeechSegment """ - audio_segment.normalize_online_bayesian(self._target_db, - self._prior_db, + audio_segment.normalize_online_bayesian(self._target_db, self._prior_db, self._prior_samples, self._startup_delay) diff --git a/setup.sh b/setup.sh index 1ae2a5eee..e0ce1c4e6 100644 --- a/setup.sh +++ b/setup.sh @@ -9,22 +9,4 @@ if [ $? != 0 ]; then exit 1 fi -# install scikits.samplerate -curl -O "http://www.mega-nerd.com/SRC/libsamplerate-0.1.9.tar.gz" -if [ $? != 0 ]; then - echo "Download libsamplerate-0.1.9.tar.gz failed !!!" - exit 1 -fi -tar -xvf libsamplerate-0.1.9.tar.gz -cd libsamplerate-0.1.9 -./configure && make && make install -cd - -rm -rf libsamplerate-0.1.9 -rm libsamplerate-0.1.9.tar.gz -pip install scikits.samplerate==0.3.3 -if [ $? != 0 ]; then - echo "Install scikits.samplerate failed !!!" - exit 1 -fi - echo "Install all dependencies successfully." From b340d4ed2fbdc487b555e3395d3093410e014a98 Mon Sep 17 00:00:00 2001 From: chrisxu2016 <823254351@qq.com> Date: Wed, 21 Jun 2017 12:18:33 +0800 Subject: [PATCH 065/335] modify setup.sh to delete the install of libsamplerate --- data_utils/augmentor/speed_perturb.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/data_utils/augmentor/speed_perturb.py b/data_utils/augmentor/speed_perturb.py index 67de344ce..3f880fbba 100755 --- a/data_utils/augmentor/speed_perturb.py +++ b/data_utils/augmentor/speed_perturb.py @@ -23,9 +23,11 @@ class SpeedPerturbAugmentor(AugmentorBase): def __init__(self, rng, min_speed_rate, max_speed_rate): if (min_speed_rate < 0.5): - raise ValueError("Sampling speed below 0.9 can cause unnatural effects") + raise ValueError("Sampling speed below 0.9 can cause unnatural "\ + "effects") if (max_speed_rate > 1.5): - raise ValueError("Sampling speed above 1.1 can cause unnatural effects") + raise ValueError("Sampling speed above 1.1 can cause unnatural "\ + "effects") self._min_speed_rate = min_speed_rate self._max_speed_rate = max_speed_rate self._rng = rng @@ -39,5 +41,6 @@ class SpeedPerturbAugmentor(AugmentorBase): :param audio_segment: Audio segment to add effects to. :type audio_segment: AudioSegment|SpeechSegment """ - sampled_speed = self._rng.uniform(self._min_speed_rate, self._max_speed_rate) + sampled_speed = self._rng.uniform(self._min_speed_rate, + self._max_speed_rate) audio_segment.change_speed(sampled_speed) From 803384561501299d01464f847e5ef9d5a6b38685 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Wed, 21 Jun 2017 14:52:06 +0800 Subject: [PATCH 066/335] add unit test for decoders --- decoder.py | 55 -------------------------- evaluate.py | 3 +- infer.py | 5 ++- scorer.py | 62 +++++++++++++++++++++++++++++ tests/test_decoders.py | 90 ++++++++++++++++++++++++++++++++++++++++++ tune.py | 3 +- 6 files changed, 159 insertions(+), 59 deletions(-) create mode 100644 scorer.py create mode 100644 tests/test_decoders.py diff --git a/decoder.py b/decoder.py index a23fa1329..006593672 100644 --- a/decoder.py +++ b/decoder.py @@ -3,10 +3,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import os from itertools import groupby import numpy as np -import kenlm import multiprocessing @@ -39,59 +37,6 @@ def ctc_best_path_decode(probs_seq, vocabulary): return ''.join([vocabulary[index] for index in index_list]) -class Scorer(object): - """External defined scorer to evaluate a sentence in beam search - decoding, consisting of language model and word count. - - :param alpha: Parameter associated with language model. - :type alpha: float - :param beta: Parameter associated with word count. - :type beta: float - :model_path: Path to load language model. - :type model_path: basestring - """ - - def __init__(self, alpha, beta, model_path): - self._alpha = alpha - self._beta = beta - if not os.path.isfile(model_path): - raise IOError("Invaid language model path: %s" % model_path) - self._language_model = kenlm.LanguageModel(model_path) - - # n-gram language model scoring - def language_model_score(self, sentence): - #log prob of last word - log_cond_prob = list( - self._language_model.full_scores(sentence, eos=False))[-1][0] - return np.power(10, log_cond_prob) - - # word insertion term - def word_count(self, sentence): - words = sentence.strip().split(' ') - return len(words) - - # execute evaluation - def __call__(self, sentence, log=False): - """Evaluation function, gathering all the scores. - - :param sentence: The input sentence for evalutation - :type sentence: basestring - :param log: Whether return the score in log representation. - :type log: bool - :return: Evaluation score, in the decimal or log. - :rtype: float - """ - lm = self.language_model_score(sentence) - word_cnt = self.word_count(sentence) - if log == False: - score = np.power(lm, self._alpha) \ - * np.power(word_cnt, self._beta) - else: - score = self._alpha * np.log(lm) \ - + self._beta * np.log(word_cnt) - return score - - def ctc_beam_search_decoder(probs_seq, beam_size, vocabulary, diff --git a/evaluate.py b/evaluate.py index dee85cbd2..a7b8e2216 100644 --- a/evaluate.py +++ b/evaluate.py @@ -10,6 +10,7 @@ import gzip from data_utils.data import DataGenerator from model import deep_speech2 from decoder import * +from scorer import Scorer from error_rate import wer parser = argparse.ArgumentParser(description=__doc__) @@ -51,7 +52,7 @@ parser.add_argument( "beam_search or beam_search_nproc. (default: %(default)s)") parser.add_argument( "--language_model_path", - default="data/1Billion.klm", + default="data/en.00.UNKNOWN.klm", type=str, help="Path for language model. (default: %(default)s)") parser.add_argument( diff --git a/infer.py b/infer.py index b4de2b60c..ca18569db 100644 --- a/infer.py +++ b/infer.py @@ -11,6 +11,7 @@ import paddle.v2 as paddle from data_utils.data import DataGenerator from model import deep_speech2 from decoder import * +from scorer import Scorer from error_rate import wer import utils @@ -67,7 +68,7 @@ parser.add_argument( help="Vocabulary filepath. (default: %(default)s)") parser.add_argument( "--decode_method", - default='best_path', + default='beam_search_nproc', type=str, help="Method for ctc decoding:" " best_path," @@ -85,7 +86,7 @@ parser.add_argument( help="Number of output per sample in beam search. (default: %(default)d)") parser.add_argument( "--language_model_path", - default="data/1Billion.klm", + default="data/en.00.UNKNOWN.klm", type=str, help="Path for language model. (default: %(default)s)") parser.add_argument( diff --git a/scorer.py b/scorer.py new file mode 100644 index 000000000..4f4684816 --- /dev/null +++ b/scorer.py @@ -0,0 +1,62 @@ +"""External Scorer for Beam Search Decoder.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import kenlm +import numpy as np + + +class Scorer(object): + """External defined scorer to evaluate a sentence in beam search + decoding, consisting of language model and word count. + + :param alpha: Parameter associated with language model. + :type alpha: float + :param beta: Parameter associated with word count. + :type beta: float + :model_path: Path to load language model. + :type model_path: basestring + """ + + def __init__(self, alpha, beta, model_path): + self._alpha = alpha + self._beta = beta + if not os.path.isfile(model_path): + raise IOError("Invaid language model path: %s" % model_path) + self._language_model = kenlm.LanguageModel(model_path) + + # n-gram language model scoring + def language_model_score(self, sentence): + #log10 prob of last word + log_cond_prob = list( + self._language_model.full_scores(sentence, eos=False))[-1][0] + return np.power(10, log_cond_prob) + + # word insertion term + def word_count(self, sentence): + words = sentence.strip().split(' ') + return len(words) + + # execute evaluation + def __call__(self, sentence, log=False): + """Evaluation function, gathering all the different scores + and return the final one. + + :param sentence: The input sentence for evalutation + :type sentence: basestring + :param log: Whether return the score in log representation. + :type log: bool + :return: Evaluation score, in the decimal or log. + :rtype: float + """ + lm = self.language_model_score(sentence) + word_cnt = self.word_count(sentence) + if log == False: + score = np.power(lm, self._alpha) \ + * np.power(word_cnt, self._beta) + else: + score = self._alpha * np.log(lm) \ + + self._beta * np.log(word_cnt) + return score diff --git a/tests/test_decoders.py b/tests/test_decoders.py new file mode 100644 index 000000000..7fa89c5f7 --- /dev/null +++ b/tests/test_decoders.py @@ -0,0 +1,90 @@ +"""Test decoders.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import unittest +from decoder import * + + +class TestDecoders(unittest.TestCase): + def setUp(self): + self.vocab_list = ["\'", ' ', 'a', 'b', 'c', 'd'] + self.beam_size = 20 + self.probs_seq1 = [[ + 0.06390443, 0.21124858, 0.27323887, 0.06870235, 0.0361254, + 0.18184413, 0.16493624 + ], [ + 0.03309247, 0.22866108, 0.24390638, 0.09699597, 0.31895462, + 0.0094893, 0.06890021 + ], [ + 0.218104, 0.19992557, 0.18245131, 0.08503348, 0.14903535, + 0.08424043, 0.08120984 + ], [ + 0.12094152, 0.19162472, 0.01473646, 0.28045061, 0.24246305, + 0.05206269, 0.09772094 + ], [ + 0.1333387, 0.00550838, 0.00301669, 0.21745861, 0.20803985, + 0.41317442, 0.01946335 + ], [ + 0.16468227, 0.1980699, 0.1906545, 0.18963251, 0.19860937, + 0.04377724, 0.01457421 + ]] + self.probs_seq2 = [[ + 0.08034842, 0.22671944, 0.05799633, 0.36814645, 0.11307441, + 0.04468023, 0.10903471 + ], [ + 0.09742457, 0.12959763, 0.09435383, 0.21889204, 0.15113123, + 0.10219457, 0.20640612 + ], [ + 0.45033529, 0.09091417, 0.15333208, 0.07939558, 0.08649316, + 0.12298585, 0.01654384 + ], [ + 0.02512238, 0.22079203, 0.19664364, 0.11906379, 0.07816055, + 0.22538587, 0.13483174 + ], [ + 0.17928453, 0.06065261, 0.41153005, 0.1172041, 0.11880313, + 0.07113197, 0.04139363 + ], [ + 0.15882358, 0.1235788, 0.23376776, 0.20510435, 0.00279306, + 0.05294827, 0.22298418 + ]] + self.best_path_result = ["ac'bdc", "b'da"] + self.beam_search_result = ['acdc', "b'a"] + + def test_best_path_decoder_1(self): + bst_result = ctc_best_path_decode(self.probs_seq1, self.vocab_list) + self.assertEqual(bst_result, self.best_path_result[0]) + + def test_best_path_decoder_2(self): + bst_result = ctc_best_path_decode(self.probs_seq2, self.vocab_list) + self.assertEqual(bst_result, self.best_path_result[1]) + + def test_beam_search_decoder_1(self): + beam_result = ctc_beam_search_decoder( + probs_seq=self.probs_seq1, + beam_size=self.beam_size, + vocabulary=self.vocab_list, + blank_id=len(self.vocab_list)) + self.assertEqual(beam_result[0][1], self.beam_search_result[0]) + + def test_beam_search_decoder_2(self): + beam_result = ctc_beam_search_decoder( + probs_seq=self.probs_seq2, + beam_size=self.beam_size, + vocabulary=self.vocab_list, + blank_id=len(self.vocab_list)) + self.assertEqual(beam_result[0][1], self.beam_search_result[1]) + + def test_beam_search_nproc_decoder(self): + beam_results = ctc_beam_search_decoder_nproc( + probs_split=[self.probs_seq1, self.probs_seq2], + beam_size=self.beam_size, + vocabulary=self.vocab_list, + blank_id=len(self.vocab_list)) + self.assertEqual(beam_results[0][0][1], self.beam_search_result[0]) + self.assertEqual(beam_results[1][0][1], self.beam_search_result[1]) + + +if __name__ == '__main__': + unittest.main() diff --git a/tune.py b/tune.py index 7dae14908..020763494 100644 --- a/tune.py +++ b/tune.py @@ -10,6 +10,7 @@ import gzip from data_utils.data import DataGenerator from model import deep_speech2 from decoder import * +from scorer import Scorer from error_rate import wer parser = argparse.ArgumentParser(description=__doc__) @@ -81,7 +82,7 @@ parser.add_argument( help="Number of outputs per sample in beam search. (default: %(default)d)") parser.add_argument( "--language_model_path", - default="data/1Billion.klm", + default="data/en.00.UNKNOWN.klm", type=str, help="Path for language model. (default: %(default)s)") parser.add_argument( From 13f708739ba956aa3c63b91e529827bc73d3e160 Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Wed, 21 Jun 2017 20:52:30 +0800 Subject: [PATCH 067/335] Improve audio featurizer and add shift augmentor. 1. Improve audio featurizer. 2. Add shift augmentor. 3. Update default argument to be the current best seggestion. 4. Add checkpoints with pass id. --- README.md | 4 +- data_utils/audio.py | 157 ++++++++++++--------- data_utils/augmentor/augmentation.py | 3 + data_utils/augmentor/volume_perturb.py | 2 +- data_utils/data.py | 7 +- data_utils/featurizer/audio_featurizer.py | 42 +++++- data_utils/featurizer/speech_featurizer.py | 24 +++- infer.py | 2 +- setup.sh | 3 + train.py | 19 ++- 10 files changed, 180 insertions(+), 83 deletions(-) diff --git a/README.md b/README.md index 0cdb203d2..2912ff314 100644 --- a/README.md +++ b/README.md @@ -51,13 +51,13 @@ python compute_mean_std.py --help For GPU Training: ``` -CUDA_VISIBLE_DEVICES=0,1,2,3 python train.py --trainer_count 4 +CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python train.py ``` For CPU Training: ``` -python train.py --trainer_count 8 --use_gpu False +python train.py --use_gpu False ``` More help for arguments: diff --git a/data_utils/audio.py b/data_utils/audio.py index 5d02feb60..1faeb48a3 100644 --- a/data_utils/audio.py +++ b/data_utils/audio.py @@ -66,6 +66,54 @@ class AudioSegment(object): samples, sample_rate = soundfile.read(file, dtype='float32') return cls(samples, sample_rate) + @classmethod + def slice_from_file(cls, file, start=None, end=None): + """Loads a small section of an audio without having to load + the entire file into the memory which can be incredibly wasteful. + + :param file: Input audio filepath or file object. + :type file: basestring|file + :param start: Start time in seconds. If start is negative, it wraps + around from the end. If not provided, this function + reads from the very beginning. + :type start: float + :param end: End time in seconds. If end is negative, it wraps around + from the end. If not provided, the default behvaior is + to read to the end of the file. + :type end: float + :return: AudioSegment instance of the specified slice of the input + audio file. + :rtype: AudioSegment + :raise ValueError: If start or end is incorrectly set, e.g. out of + bounds in time. + """ + sndfile = soundfile.SoundFile(file) + sample_rate = sndfile.samplerate + duration = float(len(sndfile)) / sample_rate + start = 0. if start is None else start + end = 0. if end is None else end + if start < 0.0: + start += duration + if end < 0.0: + end += duration + if start < 0.0: + raise ValueError("The slice start position (%f s) is out of " + "bounds." % start) + if end < 0.0: + raise ValueError("The slice end position (%f s) is out of bounds." % + end) + if start > end: + raise ValueError("The slice start position (%f s) is later than " + "the slice end position (%f s)." % (start, end)) + if end > duration: + raise ValueError("The slice end position (%f s) is out of bounds " + "(> %f s)" % (end, duration)) + start_frame = int(start * sample_rate) + end_frame = int(end * sample_rate) + sndfile.seek(start_frame) + data = sndfile.read(frames=end_frame - start_frame, dtype='float32') + return cls(data, sample_rate) + @classmethod def from_bytes(cls, bytes): """Create audio segment from a byte string containing audio samples. @@ -105,6 +153,20 @@ class AudioSegment(object): samples = np.concatenate([seg.samples for seg in segments]) return cls(samples, sample_rate) + @classmethod + def make_silence(cls, duration, sample_rate): + """Creates a silent audio segment of the given duration and sample rate. + + :param duration: Length of silence in seconds. + :type duration: float + :param sample_rate: Sample rate. + :type sample_rate: float + :return: Silent AudioSegment instance of the given duration. + :rtype: AudioSegment + """ + samples = np.zeros(int(duration * sample_rate)) + return cls(samples, sample_rate) + def to_wav_file(self, filepath, dtype='float32'): """Save audio segment to disk as wav file. @@ -130,68 +192,6 @@ class AudioSegment(object): format='WAV', subtype=subtype_map[dtype]) - @classmethod - def slice_from_file(cls, file, start=None, end=None): - """Loads a small section of an audio without having to load - the entire file into the memory which can be incredibly wasteful. - - :param file: Input audio filepath or file object. - :type file: basestring|file - :param start: Start time in seconds. If start is negative, it wraps - around from the end. If not provided, this function - reads from the very beginning. - :type start: float - :param end: End time in seconds. If end is negative, it wraps around - from the end. If not provided, the default behvaior is - to read to the end of the file. - :type end: float - :return: AudioSegment instance of the specified slice of the input - audio file. - :rtype: AudioSegment - :raise ValueError: If start or end is incorrectly set, e.g. out of - bounds in time. - """ - sndfile = soundfile.SoundFile(file) - sample_rate = sndfile.samplerate - duration = float(len(sndfile)) / sample_rate - start = 0. if start is None else start - end = 0. if end is None else end - if start < 0.0: - start += duration - if end < 0.0: - end += duration - if start < 0.0: - raise ValueError("The slice start position (%f s) is out of " - "bounds." % start) - if end < 0.0: - raise ValueError("The slice end position (%f s) is out of bounds." % - end) - if start > end: - raise ValueError("The slice start position (%f s) is later than " - "the slice end position (%f s)." % (start, end)) - if end > duration: - raise ValueError("The slice end position (%f s) is out of bounds " - "(> %f s)" % (end, duration)) - start_frame = int(start * sample_rate) - end_frame = int(end * sample_rate) - sndfile.seek(start_frame) - data = sndfile.read(frames=end_frame - start_frame, dtype='float32') - return cls(data, sample_rate) - - @classmethod - def make_silence(cls, duration, sample_rate): - """Creates a silent audio segment of the given duration and sample rate. - - :param duration: Length of silence in seconds. - :type duration: float - :param sample_rate: Sample rate. - :type sample_rate: float - :return: Silent AudioSegment instance of the given duration. - :rtype: AudioSegment - """ - samples = np.zeros(int(duration * sample_rate)) - return cls(samples, sample_rate) - def superimpose(self, other): """Add samples from another segment to those of this segment (sample-wise addition, not segment concatenation). @@ -225,7 +225,7 @@ class AudioSegment(object): samples = self._convert_samples_from_float32(self._samples, dtype) return samples.tostring() - def apply_gain(self, gain): + def gain_db(self, gain): """Apply gain in decibels to samples. Note that this is an in-place transformation. @@ -278,7 +278,7 @@ class AudioSegment(object): "Unable to normalize segment to %f dB because the " "the probable gain have exceeds max_gain_db (%f dB)" % (target_db, max_gain_db)) - self.apply_gain(min(max_gain_db, target_db - self.rms_db)) + self.gain_db(min(max_gain_db, target_db - self.rms_db)) def normalize_online_bayesian(self, target_db, @@ -319,7 +319,7 @@ class AudioSegment(object): rms_estimate_db = 10 * np.log10(mean_squared_estimate) # Compute required time-varying gain. gain_db = target_db - rms_estimate_db - self.apply_gain(gain_db) + self.gain_db(gain_db) def resample(self, target_sample_rate, quality='sinc_medium'): """Resample the audio to a target sample rate. @@ -366,6 +366,31 @@ class AudioSegment(object): raise ValueError("Unknown value for the sides %s" % sides) self._samples = padded._samples + def shift(self, shift_ms): + """Shift the audio in time. If `shift_ms` is positive, shift with time + advance; if negative, shift with time delay. Silence are padded to + keep the duration unchanged. + + Note that this is an in-place transformation. + + :param shift_ms: Shift time in millseconds. If positive, shift with + time advance; if negative; shift with time delay. + :type shift_ms: float + :raises ValueError: If shift_ms is longer than audio duration. + """ + if shift_ms / 1000.0 > self.duration: + raise ValueError("Absolute value of shift_ms should be smaller " + "than audio duration.") + shift_samples = int(shift_ms * self._sample_rate / 1000) + if shift_samples > 0: + # time advance + self._samples[:-shift_samples] = self._samples[shift_samples:] + self._samples[-shift_samples:] = 0 + elif shift_samples < 0: + # time delay + self._samples[-shift_samples:] = self._samples[:shift_samples] + self._samples[:-shift_samples] = 0 + def subsegment(self, start_sec=None, end_sec=None): """Cut the AudioSegment between given boundaries. @@ -505,7 +530,7 @@ class AudioSegment(object): noise_gain_db = min(self.rms_db - noise.rms_db - snr_dB, max_gain_db) noise_new = copy.deepcopy(noise) noise_new.random_subsegment(self.duration, rng=rng) - noise_new.apply_gain(noise_gain_db) + noise_new.gain_db(noise_gain_db) self.superimpose(noise_new) @property diff --git a/data_utils/augmentor/augmentation.py b/data_utils/augmentor/augmentation.py index abe1a0ec8..0d60bbdb9 100644 --- a/data_utils/augmentor/augmentation.py +++ b/data_utils/augmentor/augmentation.py @@ -6,6 +6,7 @@ from __future__ import print_function import json import random from data_utils.augmentor.volume_perturb import VolumePerturbAugmentor +from data_utils.augmentor.shift_perturb import ShiftPerturbAugmentor class AugmentationPipeline(object): @@ -76,5 +77,7 @@ class AugmentationPipeline(object): """Return an augmentation model by the type name, and pass in params.""" if augmentor_type == "volume": return VolumePerturbAugmentor(self._rng, **params) + elif augmentor_type == "shift": + return ShiftPerturbAugmentor(self._rng, **params) else: raise ValueError("Unknown augmentor type [%s]." % augmentor_type) diff --git a/data_utils/augmentor/volume_perturb.py b/data_utils/augmentor/volume_perturb.py index a5a9f6cad..62631fb04 100644 --- a/data_utils/augmentor/volume_perturb.py +++ b/data_utils/augmentor/volume_perturb.py @@ -36,5 +36,5 @@ class VolumePerturbAugmentor(AugmentorBase): :param audio_segment: Audio segment to add effects to. :type audio_segment: AudioSegmenet|SpeechSegment """ - gain = self._rng.uniform(min_gain_dBFS, max_gain_dBFS) + gain = self._rng.uniform(self._min_gain_dBFS, self._max_gain_dBFS) audio_segment.apply_gain(gain) diff --git a/data_utils/data.py b/data_utils/data.py index 44af7ffaa..d01ca8cc7 100644 --- a/data_utils/data.py +++ b/data_utils/data.py @@ -45,6 +45,9 @@ class DataGenerator(object): :types max_freq: None|float :param specgram_type: Specgram feature type. Options: 'linear'. :type specgram_type: str + :param use_dB_normalization: Whether to normalize the audio to -20 dB + before extracting the features. + :type use_dB_normalization: bool :param num_threads: Number of CPU threads for processing data. :type num_threads: int :param random_seed: Random seed. @@ -61,6 +64,7 @@ class DataGenerator(object): window_ms=20.0, max_freq=None, specgram_type='linear', + use_dB_normalization=True, num_threads=multiprocessing.cpu_count(), random_seed=0): self._max_duration = max_duration @@ -73,7 +77,8 @@ class DataGenerator(object): specgram_type=specgram_type, stride_ms=stride_ms, window_ms=window_ms, - max_freq=max_freq) + max_freq=max_freq, + use_dB_normalization=use_dB_normalization) self._num_threads = num_threads self._rng = random.Random(random_seed) self._epoch = 0 diff --git a/data_utils/featurizer/audio_featurizer.py b/data_utils/featurizer/audio_featurizer.py index 9f9d4e505..4b4d02c60 100644 --- a/data_utils/featurizer/audio_featurizer.py +++ b/data_utils/featurizer/audio_featurizer.py @@ -24,26 +24,64 @@ class AudioFeaturizer(object): corresponding to frequencies between [0, max_freq] are returned. :types max_freq: None|float + :param target_sample_rate: Audio are resampled (if upsampling or + downsampling is allowed) to this before + extracting spectrogram features. + :type target_sample_rate: float + :param use_dB_normalization: Whether to normalize the audio to a certain + decibels before extracting the features. + :type use_dB_normalization: bool + :param target_dB: Target audio decibels for normalization. + :type target_dB: float """ def __init__(self, specgram_type='linear', stride_ms=10.0, window_ms=20.0, - max_freq=None): + max_freq=None, + target_sample_rate=16000, + use_dB_normalization=True, + target_dB=-20): self._specgram_type = specgram_type self._stride_ms = stride_ms self._window_ms = window_ms self._max_freq = max_freq + self._target_sample_rate = target_sample_rate + self._use_dB_normalization = use_dB_normalization + self._target_dB = target_dB - def featurize(self, audio_segment): + def featurize(self, + audio_segment, + allow_downsampling=True, + allow_upsamplling=True): """Extract audio features from AudioSegment or SpeechSegment. :param audio_segment: Audio/speech segment to extract features from. :type audio_segment: AudioSegment|SpeechSegment + :param allow_downsampling: Whether to allow audio downsampling before + featurizing. + :type allow_downsampling: bool + :param allow_upsampling: Whether to allow audio upsampling before + featurizing. + :type allow_upsampling: bool :return: Spectrogram audio feature in 2darray. :rtype: ndarray + :raises ValueError: If audio sample rate is not supported. """ + # upsampling or downsampling + if ((audio_segment.sample_rate > self._target_sample_rate and + allow_downsampling) or + (audio_segment.sample_rate < self._target_sample_rate and + allow_upsampling)): + audio_segment.resample(self._target_sample_rate) + if audio_segment.sample_rate != self._target_sample_rate: + raise ValueError("Audio sample rate is not supported. " + "Turn allow_downsampling or allow up_sampling on.") + # decibel normalization + if self._use_dB_normalization: + audio_segment.normalize(target_db=self._target_dB) + # extract spectrogram return self._compute_specgram(audio_segment.samples, audio_segment.sample_rate) diff --git a/data_utils/featurizer/speech_featurizer.py b/data_utils/featurizer/speech_featurizer.py index 770204559..26283892e 100644 --- a/data_utils/featurizer/speech_featurizer.py +++ b/data_utils/featurizer/speech_featurizer.py @@ -29,6 +29,15 @@ class SpeechFeaturizer(object): corresponding to frequencies between [0, max_freq] are returned. :types max_freq: None|float + :param target_sample_rate: Speech are resampled (if upsampling or + downsampling is allowed) to this before + extracting spectrogram features. + :type target_sample_rate: float + :param use_dB_normalization: Whether to normalize the audio to a certain + decibels before extracting the features. + :type use_dB_normalization: bool + :param target_dB: Target audio decibels for normalization. + :type target_dB: float """ def __init__(self, @@ -36,9 +45,18 @@ class SpeechFeaturizer(object): specgram_type='linear', stride_ms=10.0, window_ms=20.0, - max_freq=None): - self._audio_featurizer = AudioFeaturizer(specgram_type, stride_ms, - window_ms, max_freq) + max_freq=None, + target_sample_rate=16000, + use_dB_normalization=True, + target_dB=-20): + self._audio_featurizer = AudioFeaturizer( + specgram_type=specgram_type, + stride_ms=stride_ms, + window_ms=window_ms, + max_freq=max_freq, + target_sample_rate=target_sample_rate, + use_dB_normalization=use_dB_normalization, + target_dB=target_dB) self._text_featurizer = TextFeaturizer(vocab_filepath) def featurize(self, speech_segment): diff --git a/infer.py b/infer.py index 71518133a..9037a108e 100644 --- a/infer.py +++ b/infer.py @@ -56,7 +56,7 @@ parser.add_argument( help="Manifest path for decoding. (default: %(default)s)") parser.add_argument( "--model_filepath", - default='./params.tar.gz', + default='checkpoints/params.latest.tar.gz', type=str, help="Model filepath. (default: %(default)s)") parser.add_argument( diff --git a/setup.sh b/setup.sh index 1ae2a5eee..cdec34ff0 100644 --- a/setup.sh +++ b/setup.sh @@ -27,4 +27,7 @@ if [ $? != 0 ]; then exit 1 fi +# prepare ./checkpoints +mkdir checkpoints + echo "Install all dependencies successfully." diff --git a/train.py b/train.py index fc23ec726..3a2d0cad9 100644 --- a/train.py +++ b/train.py @@ -17,10 +17,10 @@ import utils parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( - "--batch_size", default=32, type=int, help="Minibatch size.") + "--batch_size", default=256, type=int, help="Minibatch size.") parser.add_argument( "--num_passes", - default=20, + default=200, type=int, help="Training pass number. (default: %(default)s)") parser.add_argument( @@ -55,7 +55,7 @@ parser.add_argument( help="Use sortagrad or not. (default: %(default)s)") parser.add_argument( "--max_duration", - default=100.0, + default=27.0, type=float, help="Audios with duration larger than this will be discarded. " "(default: %(default)s)") @@ -67,13 +67,13 @@ parser.add_argument( "(default: %(default)s)") parser.add_argument( "--shuffle_method", - default='instance_shuffle', + default='batch_shuffle_clipped', type=str, help="Shuffle method: 'instance_shuffle', 'batch_shuffle', " "'batch_shuffle_batch'. (default: %(default)s)") parser.add_argument( "--trainer_count", - default=4, + default=8, type=int, help="Trainer number. (default: %(default)s)") parser.add_argument( @@ -110,7 +110,9 @@ parser.add_argument( "the existing model of this path. (default: %(default)s)") parser.add_argument( "--augmentation_config", - default='{}', + default='[{"type": "shift", ' + '"params": {"min_shift_ms": -5, "max_shift_ms": 5},' + '"prob": 1.0}]', type=str, help="Augmentation configuration in json-format. " "(default: %(default)s)") @@ -189,7 +191,7 @@ def train(): print("\nPass: %d, Batch: %d, TrainCost: %f" % ( event.pass_id, event.batch_id + 1, cost_sum / cost_counter)) cost_sum, cost_counter = 0.0, 0 - with gzip.open("params.tar.gz", 'w') as f: + with gzip.open("checkpoints/params.latest.tar.gz", 'w') as f: parameters.to_tar(f) else: sys.stdout.write('.') @@ -202,6 +204,9 @@ def train(): reader=test_batch_reader, feeding=test_generator.feeding) print("\n------- Time: %d sec, Pass: %d, ValidationCost: %s" % (time.time() - start_time, event.pass_id, result.cost)) + with gzip.open("checkpoints/params.pass-%d.tar.gz" % event.pass_id, + 'w') as f: + parameters.to_tar(f) # run train trainer.train( From 6d6cdf40576dff0086e221a3d5e761530e24f811 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Mon, 26 Jun 2017 13:04:36 +0800 Subject: [PATCH 068/335] Refine SoundFile installation process. 1. Install libsndfile first. 2. Install SoundFile using pip. --- requirements.txt | 1 - setup.sh | 20 ++++++++++---------- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/requirements.txt b/requirements.txt index 0183ecf01..79272e7e6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,2 @@ -SoundFile==0.9.0.post1 wget==3.2 scipy==0.13.1 diff --git a/setup.sh b/setup.sh index 1ae2a5eee..a801a0b2a 100644 --- a/setup.sh +++ b/setup.sh @@ -1,7 +1,7 @@ #!/bin/bash # install python dependencies -if [ -f 'requirements.txt' ]; then +if [ -f "requirements.txt" ]; then pip install -r requirements.txt fi if [ $? != 0 ]; then @@ -9,21 +9,21 @@ if [ $? != 0 ]; then exit 1 fi -# install scikits.samplerate -curl -O "http://www.mega-nerd.com/SRC/libsamplerate-0.1.9.tar.gz" +# install package Soundfile +curl -O "http://www.mega-nerd.com/libsndfile/files/libsndfile-1.0.28.tar.gz" if [ $? != 0 ]; then - echo "Download libsamplerate-0.1.9.tar.gz failed !!!" + echo "Download libsndfile-1.0.28.tar.gz failed !!!" exit 1 fi -tar -xvf libsamplerate-0.1.9.tar.gz -cd libsamplerate-0.1.9 +tar -zxvf libsndfile-1.0.28.tar.gz +cd libsndfile-1.0.28 ./configure && make && make install cd - -rm -rf libsamplerate-0.1.9 -rm libsamplerate-0.1.9.tar.gz -pip install scikits.samplerate==0.3.3 +rm -rf libsndfile-1.0.28 +rm libsndfile-1.0.28.tar.gz +pip install SoundFile==0.9.0.post1 if [ $? != 0 ]; then - echo "Install scikits.samplerate failed !!!" + echo "Install SoundFile failed !!!" exit 1 fi From cdd52ac2706929ea993038aedce3080eb2de8af8 Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Mon, 26 Jun 2017 14:17:22 +0800 Subject: [PATCH 069/335] Fix a missing abs bug for DS2 AudioSegment. --- data_utils/audio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data_utils/audio.py b/data_utils/audio.py index 1faeb48a3..d55fae1ef 100644 --- a/data_utils/audio.py +++ b/data_utils/audio.py @@ -378,7 +378,7 @@ class AudioSegment(object): :type shift_ms: float :raises ValueError: If shift_ms is longer than audio duration. """ - if shift_ms / 1000.0 > self.duration: + if abs(shift_ms) / 1000.0 > self.duration: raise ValueError("Absolute value of shift_ms should be smaller " "than audio duration.") shift_samples = int(shift_ms * self._sample_rate / 1000) From 0dadd14600dfae51995c75746ee0c237f83995d1 Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Mon, 26 Jun 2017 19:19:16 +0800 Subject: [PATCH 070/335] Patch for adding missing shift_perturb.py in last commmit (pull request #114). --- data_utils/augmentor/shift_perturb.py | 34 +++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 data_utils/augmentor/shift_perturb.py diff --git a/data_utils/augmentor/shift_perturb.py b/data_utils/augmentor/shift_perturb.py new file mode 100644 index 000000000..c4cbe3e17 --- /dev/null +++ b/data_utils/augmentor/shift_perturb.py @@ -0,0 +1,34 @@ +"""Contains the volume perturb augmentation model.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from data_utils.augmentor.base import AugmentorBase + + +class ShiftPerturbAugmentor(AugmentorBase): + """Augmentation model for adding random shift perturbation. + + :param rng: Random generator object. + :type rng: random.Random + :param min_shift_ms: Minimal shift in milliseconds. + :type min_shift_ms: float + :param max_shift_ms: Maximal shift in milliseconds. + :type max_shift_ms: float + """ + + def __init__(self, rng, min_shift_ms, max_shift_ms): + self._min_shift_ms = min_shift_ms + self._max_shift_ms = max_shift_ms + self._rng = rng + + def transform_audio(self, audio_segment): + """Shift audio. + + Note that this is an in-place transformation. + + :param audio_segment: Audio segment to add effects to. + :type audio_segment: AudioSegmenet|SpeechSegment + """ + shift_ms = self._rng.uniform(self._min_shift_ms, self._max_shift_ms) + audio_segment.shift(shift_ms) From 29f6ae08076d9811ab6aae91ffff3c0dfaf7bc85 Mon Sep 17 00:00:00 2001 From: xushaoyong Date: Tue, 27 Jun 2017 17:16:35 +0800 Subject: [PATCH 071/335] modify audio resample function --- data_utils/audio.py | 1 - 1 file changed, 1 deletion(-) diff --git a/data_utils/audio.py b/data_utils/audio.py index 3d9b6c119..3891f5b92 100644 --- a/data_utils/audio.py +++ b/data_utils/audio.py @@ -332,7 +332,6 @@ class AudioSegment(object): 'kaiser_fast'}. :type filter: str """ - resample_ratio = target_sample_rate / self._sample_rate self._samples = resampy.resample( self.samples, self.sample_rate, target_sample_rate, filter=filter) self._sample_rate = target_sample_rate From 26510f74a63307786f83db3f9faa2f579292e1f4 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Tue, 27 Jun 2017 17:42:44 +0800 Subject: [PATCH 072/335] refine ctc_beam_search_decoder --- decoder.py | 128 +++++++++++++++++++---------------- evaluate.py | 89 +++++++++++------------- infer.py | 79 ++++++++------------- lm/__init__.py | 0 scorer.py => lm/lm_scorer.py | 21 +++--- lm/run.sh | 3 + requirements.txt | 1 + tests/test_decoders.py | 6 +- tune.py | 89 +++++++++--------------- 9 files changed, 187 insertions(+), 229 deletions(-) create mode 100644 lm/__init__.py rename scorer.py => lm/lm_scorer.py (73%) create mode 100644 lm/run.sh diff --git a/decoder.py b/decoder.py index 006593672..4676b02b7 100644 --- a/decoder.py +++ b/decoder.py @@ -8,8 +8,8 @@ import numpy as np import multiprocessing -def ctc_best_path_decode(probs_seq, vocabulary): - """Best path decoding, also called argmax decoding or greedy decoding. +def ctc_best_path_decoder(probs_seq, vocabulary): + """Best path decoder, also called argmax decoder or greedy decoder. Path consisting of the most probable tokens are further post-processed to remove consecutive repetitions and all blanks. @@ -40,73 +40,84 @@ def ctc_best_path_decode(probs_seq, vocabulary): def ctc_beam_search_decoder(probs_seq, beam_size, vocabulary, - blank_id=0, + blank_id, cutoff_prob=1.0, ext_scoring_func=None, nproc=False): - '''Beam search decoder for CTC-trained network, using beam search with width - beam_size to find many paths to one label, return beam_size labels in - the descending order of probabilities. The implementation is based on Prefix - Beam Search(https://arxiv.org/abs/1408.2873), and the unclear part is - redesigned. - - :param probs_seq: 2-D list with length num_time_steps, each element - is a list of normalized probabilities over vocabulary - and blank for one time step. + """Beam search decoder for CTC-trained network. It utilizes beam search + to approximately select top best decoding labels and returning results + in the descending order. The implementation is based on Prefix + Beam Search (https://arxiv.org/abs/1408.2873), and the unclear part is + redesigned. Two important modifications: 1) in the iterative computation + of probabilities, the assignment operation is changed to accumulation for + one prefix may comes from different paths; 2) the if condition "if l^+ not + in A_prev then" after probabilities' computation is deprecated for it is + hard to understand and seems unnecessary. + + :param probs_seq: 2-D list of probability distributions over each time + step, with each element being a list of normalized + probabilities over vocabulary and blank. :type probs_seq: 2-D list :param beam_size: Width for beam search. :type beam_size: int :param vocabulary: Vocabulary list. :type vocabulary: list - :param blank_id: ID of blank, default 0. + :param blank_id: ID of blank. :type blank_id: int :param cutoff_prob: Cutoff probability in pruning, default 1.0, no pruning. :type cutoff_prob: float - :param ext_scoring_func: External defined scoring function for + :param ext_scoring_func: External scoring function for partially decoded sentence, e.g. word count - and language model. - :type external_scoring_function: function + or language model. + :type external_scoring_func: callable :param nproc: Whether the decoder used in multiprocesses. :type nproc: bool - :return: Decoding log probabilities and result sentences in descending order. + :return: List of tuples of log probability and sentence as decoding + results, in descending order of the probability. :rtype: list - ''' + """ # dimension check for prob_list in probs_seq: if not len(prob_list) == len(vocabulary) + 1: - raise ValueError("probs dimension mismatched with vocabulary") - num_time_steps = len(probs_seq) + raise ValueError("The shape of prob_seq does not match with the " + "shape of the vocabulary.") # blank_id check - probs_dim = len(probs_seq[0]) - if not blank_id < probs_dim: + if not blank_id < len(probs_seq[0]): raise ValueError("blank_id shouldn't be greater than probs dimension") # If the decoder called in the multiprocesses, then use the global scorer - # instantiated in ctc_beam_search_decoder_nproc(). + # instantiated in ctc_beam_search_decoder_batch(). if nproc is True: global ext_nproc_scorer ext_scoring_func = ext_nproc_scorer ## initialize - # the set containing selected prefixes - prefix_set_prev = {'\t': 1.0} - probs_b_prev, probs_nb_prev = {'\t': 1.0}, {'\t': 0.0} + # prefix_set_prev: the set containing selected prefixes + # probs_b_prev: prefixes' probability ending with blank in previous step + # probs_nb_prev: prefixes' probability ending with non-blank in previous step + prefix_set_prev, probs_b_prev, probs_nb_prev = { + '\t': 1.0 + }, { + '\t': 1.0 + }, { + '\t': 0.0 + } ## extend prefix in loop - for time_step in xrange(num_time_steps): - # the set containing candidate prefixes - prefix_set_next = {} - probs_b_cur, probs_nb_cur = {}, {} - prob = probs_seq[time_step] - prob_idx = [[i, prob[i]] for i in xrange(len(prob))] + for time_step in xrange(len(probs_seq)): + # prefix_set_next: the set containing candidate prefixes + # probs_b_cur: prefixes' probability ending with blank in current step + # probs_nb_cur: prefixes' probability ending with non-blank in current step + prefix_set_next, probs_b_cur, probs_nb_cur = {}, {}, {} + + prob_idx = list(enumerate(probs_seq[time_step])) cutoff_len = len(prob_idx) #If pruning is enabled - if (cutoff_prob < 1.0): + if cutoff_prob < 1.0: prob_idx = sorted(prob_idx, key=lambda asd: asd[1], reverse=True) - cutoff_len = 0 - cum_prob = 0.0 + cutoff_len, cum_prob = 0, 0.0 for i in xrange(len(prob_idx)): cum_prob += prob_idx[i][1] cutoff_len += 1 @@ -162,54 +173,53 @@ def ctc_beam_search_decoder(probs_seq, prefix_set_prev = dict(prefix_set_prev) beam_result = [] - for (seq, prob) in prefix_set_prev.items(): + for seq, prob in prefix_set_prev.items(): if prob > 0.0 and len(seq) > 1: result = seq[1:] # score last word by external scorer if (ext_scoring_func is not None) and (result[-1] != ' '): prob = prob * ext_scoring_func(result) log_prob = np.log(prob) - beam_result.append([log_prob, result]) + beam_result.append((log_prob, result)) ## output top beam_size decoding results beam_result = sorted(beam_result, key=lambda asd: asd[0], reverse=True) return beam_result -def ctc_beam_search_decoder_nproc(probs_split, +def ctc_beam_search_decoder_batch(probs_split, beam_size, vocabulary, - blank_id=0, + blank_id, + num_processes, cutoff_prob=1.0, - ext_scoring_func=None, - num_processes=None): - '''Beam search decoder using multiple processes. + ext_scoring_func=None): + """CTC beam search decoder using multiple processes. - :param probs_seq: 3-D list with length batch_size, each element - is a 2-D list of probabilities can be used by - ctc_beam_search_decoder. + :param probs_seq: 3-D list with each element as an instance of 2-D list + of probabilities used by ctc_beam_search_decoder(). :type probs_seq: 3-D list :param beam_size: Width for beam search. :type beam_size: int :param vocabulary: Vocabulary list. :type vocabulary: list - :param blank_id: ID of blank, default 0. + :param blank_id: ID of blank. :type blank_id: int + :param num_processes: Number of parallel processes. + :type num_processes: int :param cutoff_prob: Cutoff probability in pruning, - default 0, no pruning. + default 1.0, no pruning. + :param num_processes: Number of parallel processes. + :type num_processes: int :type cutoff_prob: float - :param ext_scoring_func: External defined scoring function for + :param ext_scoring_func: External scoring function for partially decoded sentence, e.g. word count - and language model. - :type external_scoring_function: function - :param num_processes: Number of processes, default None, equal to the - number of CPUs. - :type num_processes: int - :return: Decoding log probabilities and result sentences in descending order. + or language model. + :type external_scoring_function: callable + :return: List of tuples of log probability and sentence as decoding + results, in descending order of the probability. :rtype: list - ''' - if num_processes is None: - num_processes = multiprocessing.cpu_count() + """ if not num_processes > 0: raise ValueError("Number of processes must be positive!") @@ -227,7 +237,5 @@ def ctc_beam_search_decoder_nproc(probs_split, pool.close() pool.join() - beam_search_results = [] - for result in results: - beam_search_results.append(result.get()) + beam_search_results = [result.get() for result in results] return beam_search_results diff --git a/evaluate.py b/evaluate.py index a7b8e2216..7ef32ad12 100644 --- a/evaluate.py +++ b/evaluate.py @@ -3,22 +3,22 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import paddle.v2 as paddle import distutils.util import argparse import gzip +import paddle.v2 as paddle from data_utils.data import DataGenerator from model import deep_speech2 from decoder import * -from scorer import Scorer +from lm.lm_scorer import LmScorer from error_rate import wer parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( - "--num_samples", + "--batch_size", default=100, type=int, - help="Number of samples for evaluation. (default: %(default)s)") + help="Minibatch size for evaluation. (default: %(default)s)") parser.add_argument( "--num_conv_layers", default=2, @@ -39,6 +39,16 @@ parser.add_argument( default=True, type=distutils.util.strtobool, help="Use gpu or not. (default: %(default)s)") +parser.add_argument( + "--num_threads_data", + default=multiprocessing.cpu_count(), + type=int, + help="Number of cpu threads for preprocessing data. (default: %(default)s)") +parser.add_argument( + "--num_processes_beam_search", + default=multiprocessing.cpu_count(), + type=int, + help="Number of cpu processes for beam search. (default: %(default)s)") parser.add_argument( "--mean_std_filepath", default='mean_std.npz', @@ -46,10 +56,10 @@ parser.add_argument( help="Manifest path for normalizer. (default: %(default)s)") parser.add_argument( "--decode_method", - default='beam_search_nproc', + default='beam_search', type=str, - help="Method for ctc decoding, best_path, " - "beam_search or beam_search_nproc. (default: %(default)s)") + help="Method for ctc decoding, best_path or beam_search. (default: %(default)s)" +) parser.add_argument( "--language_model_path", default="data/en.00.UNKNOWN.klm", @@ -76,11 +86,6 @@ parser.add_argument( default=500, type=int, help="Width for beam search decoding. (default: %(default)d)") -parser.add_argument( - "--normalizer_manifest_path", - default='data/manifest.libri.train-clean-100', - type=str, - help="Manifest path for normalizer. (default: %(default)s)") parser.add_argument( "--decode_manifest_path", default='data/manifest.libri.test-clean', @@ -88,7 +93,7 @@ parser.add_argument( help="Manifest path for decoding. (default: %(default)s)") parser.add_argument( "--model_filepath", - default='./params.tar.gz', + default='checkpoints/params.latest.tar.gz', type=str, help="Model filepath. (default: %(default)s)") parser.add_argument( @@ -101,12 +106,12 @@ args = parser.parse_args() def evaluate(): """Evaluate on whole test data for DeepSpeech2.""" - # initialize data generator data_generator = DataGenerator( vocab_filepath=args.vocab_filepath, mean_std_filepath=args.mean_std_filepath, - augmentation_config='{}') + augmentation_config='{}', + num_threads=args.num_threads_data) # create network config # paddle.data_type.dense_array is used for variable batch input. @@ -133,7 +138,7 @@ def evaluate(): # prepare infer data batch_reader = data_generator.batch_reader_creator( manifest_path=args.decode_manifest_path, - batch_size=args.num_samples, + batch_size=args.batch_size, sortagrad=False, shuffle_method=None) @@ -142,9 +147,8 @@ def evaluate(): output_layer=output_probs, parameters=parameters) # initialize external scorer for beam search decoding - if args.decode_method == 'beam_search' or \ - args.decode_method == 'beam_search_nproc': - ext_scorer = Scorer(args.alpha, args.beta, args.language_model_path) + if args.decode_method == 'beam_search': + ext_scorer = LmScorer(args.alpha, args.beta, args.language_model_path) wer_counter, wer_sum = 0, 0.0 for infer_data in batch_reader(): @@ -155,56 +159,39 @@ def evaluate(): infer_results[i * num_steps:(i + 1) * num_steps] for i in xrange(0, len(infer_data)) ] - + # target transcription + target_transcription = [ + ''.join([ + data_generator.vocab_list[index] for index in infer_data[i][1] + ]) for i, probs in enumerate(probs_split) + ] # decode and print # best path decode if args.decode_method == "best_path": for i, probs in enumerate(probs_split): - output_transcription = ctc_best_path_decode( + output_transcription = ctc_best_path_decoder( probs_seq=probs, vocabulary=data_generator.vocab_list) - target_transcription = ''.join([ - data_generator.vocab_list[index] - for index in infer_data[i][1] - ]) - wer_sum += wer(target_transcription, output_transcription) + wer_sum += wer(target_transcription[i], output_transcription) wer_counter += 1 - # beam search decode in single process + # beam search decode elif args.decode_method == "beam_search": - for i, probs in enumerate(probs_split): - target_transcription = ''.join([ - data_generator.vocab_list[index] - for index in infer_data[i][1] - ]) - beam_search_result = ctc_beam_search_decoder( - probs_seq=probs, - vocabulary=data_generator.vocab_list, - beam_size=args.beam_size, - blank_id=len(data_generator.vocab_list), - ext_scoring_func=ext_scorer, - cutoff_prob=args.cutoff_prob, ) - wer_sum += wer(target_transcription, beam_search_result[0][1]) - wer_counter += 1 - # beam search using multiple processes - elif args.decode_method == "beam_search_nproc": - beam_search_nproc_results = ctc_beam_search_decoder_nproc( + # beam search using multiple processes + beam_search_results = ctc_beam_search_decoder_batch( probs_split=probs_split, vocabulary=data_generator.vocab_list, beam_size=args.beam_size, blank_id=len(data_generator.vocab_list), + num_processes=args.num_processes_beam_search, ext_scoring_func=ext_scorer, cutoff_prob=args.cutoff_prob, ) - for i, beam_search_result in enumerate(beam_search_nproc_results): - target_transcription = ''.join([ - data_generator.vocab_list[index] - for index in infer_data[i][1] - ]) - wer_sum += wer(target_transcription, beam_search_result[0][1]) + for i, beam_search_result in enumerate(beam_search_results): + wer_sum += wer(target_transcription[i], + beam_search_result[0][1]) wer_counter += 1 else: raise ValueError("Decoding method [%s] is not supported." % decode_method) - print("Cur WER = %f" % (wer_sum / wer_counter)) print("Final WER = %f" % (wer_sum / wer_counter)) diff --git a/infer.py b/infer.py index 069b9e3e3..5f0f268a8 100644 --- a/infer.py +++ b/infer.py @@ -11,14 +11,14 @@ import paddle.v2 as paddle from data_utils.data import DataGenerator from model import deep_speech2 from decoder import * -from scorer import Scorer +from lm.lm_scorer import LmScorer from error_rate import wer import utils parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( "--num_samples", - default=100, + default=10, type=int, help="Number of samples for inference. (default: %(default)s)") parser.add_argument( @@ -46,6 +46,11 @@ parser.add_argument( default=multiprocessing.cpu_count(), type=int, help="Number of cpu threads for preprocessing data. (default: %(default)s)") +parser.add_argument( + "--num_processes_beam_search", + default=multiprocessing.cpu_count(), + type=int, + help="Number of cpu processes for beam search. (default: %(default)s)") parser.add_argument( "--mean_std_filepath", default='mean_std.npz', @@ -53,12 +58,12 @@ parser.add_argument( help="Manifest path for normalizer. (default: %(default)s)") parser.add_argument( "--decode_manifest_path", - default='data/manifest.libri.test-100sample', + default='datasets/manifest.test', type=str, help="Manifest path for decoding. (default: %(default)s)") parser.add_argument( "--model_filepath", - default='checkpoints/params.latest.tar.gz', + default='checkpoints/params.tar.gz.41', type=str, help="Model filepath. (default: %(default)s)") parser.add_argument( @@ -68,12 +73,10 @@ parser.add_argument( help="Vocabulary filepath. (default: %(default)s)") parser.add_argument( "--decode_method", - default='beam_search_nproc', + default='beam_search', type=str, - help="Method for ctc decoding:" - " best_path," - " beam_search, " - " or beam_search_nproc. (default: %(default)s)") + help="Method for ctc decoding: best_path or beam_search. (default: %(default)s)" +) parser.add_argument( "--beam_size", default=500, @@ -86,7 +89,7 @@ parser.add_argument( help="Number of output per sample in beam search. (default: %(default)d)") parser.add_argument( "--language_model_path", - default="data/en.00.UNKNOWN.klm", + default="lm/data/en.00.UNKNOWN.klm", type=str, help="Path for language model. (default: %(default)s)") parser.add_argument( @@ -143,6 +146,7 @@ def infer(): batch_reader = data_generator.batch_reader_creator( manifest_path=args.decode_manifest_path, batch_size=args.num_samples, + min_batch_size=1, sortagrad=False, shuffle_method=None) infer_data = batch_reader().next() @@ -156,68 +160,45 @@ def infer(): for i in xrange(len(infer_data)) ] + # targe transcription + target_transcription = [ + ''.join( + [data_generator.vocab_list[index] for index in infer_data[i][1]]) + for i, probs in enumerate(probs_split) + ] + ## decode and print # best path decode wer_sum, wer_counter = 0, 0 if args.decode_method == "best_path": for i, probs in enumerate(probs_split): - target_transcription = ''.join([ - data_generator.vocab_list[index] for index in infer_data[i][1] - ]) - best_path_transcription = ctc_best_path_decode( + best_path_transcription = ctc_best_path_decoder( probs_seq=probs, vocabulary=data_generator.vocab_list) print("\nTarget Transcription: %s\nOutput Transcription: %s" % - (target_transcription, best_path_transcription)) - wer_cur = wer(target_transcription, best_path_transcription) + (target_transcription[i], best_path_transcription)) + wer_cur = wer(target_transcription[i], best_path_transcription) wer_sum += wer_cur wer_counter += 1 print("cur wer = %f, average wer = %f" % (wer_cur, wer_sum / wer_counter)) # beam search decode elif args.decode_method == "beam_search": - ext_scorer = Scorer(args.alpha, args.beta, args.language_model_path) - for i, probs in enumerate(probs_split): - target_transcription = ''.join([ - data_generator.vocab_list[index] for index in infer_data[i][1] - ]) - beam_search_result = ctc_beam_search_decoder( - probs_seq=probs, - vocabulary=data_generator.vocab_list, - beam_size=args.beam_size, - blank_id=len(data_generator.vocab_list), - cutoff_prob=args.cutoff_prob, - ext_scoring_func=ext_scorer, ) - print("\nTarget Transcription:\t%s" % target_transcription) - - for index in xrange(args.num_results_per_sample): - result = beam_search_result[index] - #output: index, log prob, beam result - print("Beam %d: %f \t%s" % (index, result[0], result[1])) - wer_cur = wer(target_transcription, beam_search_result[0][1]) - wer_sum += wer_cur - wer_counter += 1 - print("cur wer = %f , average wer = %f" % - (wer_cur, wer_sum / wer_counter)) - elif args.decode_method == "beam_search_nproc": - ext_scorer = Scorer(args.alpha, args.beta, args.language_model_path) - beam_search_nproc_results = ctc_beam_search_decoder_nproc( + ext_scorer = LmScorer(args.alpha, args.beta, args.language_model_path) + beam_search_batch_results = ctc_beam_search_decoder_batch( probs_split=probs_split, vocabulary=data_generator.vocab_list, beam_size=args.beam_size, blank_id=len(data_generator.vocab_list), + num_processes=args.num_processes_beam_search, cutoff_prob=args.cutoff_prob, ext_scoring_func=ext_scorer, ) - for i, beam_search_result in enumerate(beam_search_nproc_results): - target_transcription = ''.join([ - data_generator.vocab_list[index] for index in infer_data[i][1] - ]) - print("\nTarget Transcription:\t%s" % target_transcription) - + for i, beam_search_result in enumerate(beam_search_batch_results): + print("\nTarget Transcription:\t%s" % target_transcription[i]) for index in xrange(args.num_results_per_sample): result = beam_search_result[index] #output: index, log prob, beam result print("Beam %d: %f \t%s" % (index, result[0], result[1])) - wer_cur = wer(target_transcription, beam_search_result[0][1]) + wer_cur = wer(target_transcription[i], beam_search_result[0][1]) wer_sum += wer_cur wer_counter += 1 print("cur wer = %f , average wer = %f" % diff --git a/lm/__init__.py b/lm/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/scorer.py b/lm/lm_scorer.py similarity index 73% rename from scorer.py rename to lm/lm_scorer.py index 4f4684816..1c029e97f 100644 --- a/scorer.py +++ b/lm/lm_scorer.py @@ -8,13 +8,16 @@ import kenlm import numpy as np -class Scorer(object): - """External defined scorer to evaluate a sentence in beam search - decoding, consisting of language model and word count. +class LmScorer(object): + """External scorer to evaluate a prefix or whole sentence in + beam search decoding, including the score from n-gram language + model and word count. - :param alpha: Parameter associated with language model. + :param alpha: Parameter associated with language model. Don't use + language model when alpha = 0. :type alpha: float - :param beta: Parameter associated with word count. + :param beta: Parameter associated with word count. Don't use word + count when beta = 0. :type beta: float :model_path: Path to load language model. :type model_path: basestring @@ -28,14 +31,14 @@ class Scorer(object): self._language_model = kenlm.LanguageModel(model_path) # n-gram language model scoring - def language_model_score(self, sentence): + def _language_model_score(self, sentence): #log10 prob of last word log_cond_prob = list( self._language_model.full_scores(sentence, eos=False))[-1][0] return np.power(10, log_cond_prob) # word insertion term - def word_count(self, sentence): + def _word_count(self, sentence): words = sentence.strip().split(' ') return len(words) @@ -51,8 +54,8 @@ class Scorer(object): :return: Evaluation score, in the decimal or log. :rtype: float """ - lm = self.language_model_score(sentence) - word_cnt = self.word_count(sentence) + lm = self._language_model_score(sentence) + word_cnt = self._word_count(sentence) if log == False: score = np.power(lm, self._alpha) \ * np.power(word_cnt, self._beta) diff --git a/lm/run.sh b/lm/run.sh new file mode 100644 index 000000000..bf523740c --- /dev/null +++ b/lm/run.sh @@ -0,0 +1,3 @@ +echo "Downloading language model." + +wget -c ftp://xxx/xxx/en.00.UNKNOWN.klm -P ./data diff --git a/requirements.txt b/requirements.txt index 0183ecf01..ce0245916 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ SoundFile==0.9.0.post1 wget==3.2 scipy==0.13.1 +https://github.com/kpu/kenlm/archive/master.zip diff --git a/tests/test_decoders.py b/tests/test_decoders.py index 7fa89c5f7..4435355cc 100644 --- a/tests/test_decoders.py +++ b/tests/test_decoders.py @@ -53,11 +53,11 @@ class TestDecoders(unittest.TestCase): self.beam_search_result = ['acdc', "b'a"] def test_best_path_decoder_1(self): - bst_result = ctc_best_path_decode(self.probs_seq1, self.vocab_list) + bst_result = ctc_best_path_decoder(self.probs_seq1, self.vocab_list) self.assertEqual(bst_result, self.best_path_result[0]) def test_best_path_decoder_2(self): - bst_result = ctc_best_path_decode(self.probs_seq2, self.vocab_list) + bst_result = ctc_best_path_decoder(self.probs_seq2, self.vocab_list) self.assertEqual(bst_result, self.best_path_result[1]) def test_beam_search_decoder_1(self): @@ -77,7 +77,7 @@ class TestDecoders(unittest.TestCase): self.assertEqual(beam_result[0][1], self.beam_search_result[1]) def test_beam_search_nproc_decoder(self): - beam_results = ctc_beam_search_decoder_nproc( + beam_results = ctc_beam_search_decoder_batch( probs_split=[self.probs_seq1, self.probs_seq2], beam_size=self.beam_size, vocabulary=self.vocab_list, diff --git a/tune.py b/tune.py index 020763494..9cea66b90 100644 --- a/tune.py +++ b/tune.py @@ -3,14 +3,14 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import paddle.v2 as paddle import distutils.util import argparse import gzip +import paddle.v2 as paddle from data_utils.data import DataGenerator from model import deep_speech2 from decoder import * -from scorer import Scorer +from lm.lm_scorer import LmScorer from error_rate import wer parser = argparse.ArgumentParser(description=__doc__) @@ -39,24 +39,29 @@ parser.add_argument( default=True, type=distutils.util.strtobool, help="Use gpu or not. (default: %(default)s)") +parser.add_argument( + "--num_threads_data", + default=multiprocessing.cpu_count(), + type=int, + help="Number of cpu threads for preprocessing data. (default: %(default)s)") +parser.add_argument( + "--num_processes_beam_search", + default=multiprocessing.cpu_count(), + type=int, + help="Number of cpu processes for beam search. (default: %(default)s)") parser.add_argument( "--mean_std_filepath", default='mean_std.npz', type=str, help="Manifest path for normalizer. (default: %(default)s)") -parser.add_argument( - "--normalizer_manifest_path", - default='data/manifest.libri.train-clean-100', - type=str, - help="Manifest path for normalizer. (default: %(default)s)") parser.add_argument( "--decode_manifest_path", - default='data/manifest.libri.test-100sample', + default='datasets/manifest.test', type=str, help="Manifest path for decoding. (default: %(default)s)") parser.add_argument( "--model_filepath", - default='./params.tar.gz', + default='checkpoints/params.latest.tar.gz', type=str, help="Model filepath. (default: %(default)s)") parser.add_argument( @@ -64,25 +69,14 @@ parser.add_argument( default='datasets/vocab/eng_vocab.txt', type=str, help="Vocabulary filepath. (default: %(default)s)") -parser.add_argument( - "--decode_method", - default='beam_search_nproc', - type=str, - help="Method for decoding, beam_search or beam_search_nproc. (default: %(default)s)" -) parser.add_argument( "--beam_size", default=500, type=int, help="Width for beam search decoding. (default: %(default)d)") -parser.add_argument( - "--num_results_per_sample", - default=1, - type=int, - help="Number of outputs per sample in beam search. (default: %(default)d)") parser.add_argument( "--language_model_path", - default="data/en.00.UNKNOWN.klm", + default="lm/data/en.00.UNKNOWN.klm", type=str, help="Path for language model. (default: %(default)s)") parser.add_argument( @@ -137,7 +131,8 @@ def tune(): data_generator = DataGenerator( vocab_filepath=args.vocab_filepath, mean_std_filepath=args.mean_std_filepath, - augmentation_config='{}') + augmentation_config='{}', + num_threads=args.num_threads_data) # create network config # paddle.data_type.dense_array is used for variable batch input. @@ -188,42 +183,22 @@ def tune(): ## tune parameters in loop for (alpha, beta) in params_grid: wer_sum, wer_counter = 0, 0 - ext_scorer = Scorer(alpha, beta, args.language_model_path) - # beam search decode - if args.decode_method == "beam_search": - for i, probs in enumerate(probs_split): - target_transcription = ''.join([ - data_generator.vocab_list[index] - for index in infer_data[i][1] - ]) - beam_search_result = ctc_beam_search_decoder( - probs_seq=probs, - vocabulary=data_generator.vocab_list, - beam_size=args.beam_size, - blank_id=len(data_generator.vocab_list), - cutoff_prob=args.cutoff_prob, - ext_scoring_func=ext_scorer, ) - wer_sum += wer(target_transcription, beam_search_result[0][1]) - wer_counter += 1 + ext_scorer = LmScorer(alpha, beta, args.language_model_path) # beam search using multiple processes - elif args.decode_method == "beam_search_nproc": - beam_search_nproc_results = ctc_beam_search_decoder_nproc( - probs_split=probs_split, - vocabulary=data_generator.vocab_list, - beam_size=args.beam_size, - cutoff_prob=args.cutoff_prob, - blank_id=len(data_generator.vocab_list), - ext_scoring_func=ext_scorer, ) - for i, beam_search_result in enumerate(beam_search_nproc_results): - target_transcription = ''.join([ - data_generator.vocab_list[index] - for index in infer_data[i][1] - ]) - wer_sum += wer(target_transcription, beam_search_result[0][1]) - wer_counter += 1 - else: - raise ValueError("Decoding method [%s] is not supported." % - decode_method) + beam_search_results = ctc_beam_search_decoder_batch( + probs_split=probs_split, + vocabulary=data_generator.vocab_list, + beam_size=args.beam_size, + cutoff_prob=args.cutoff_prob, + blank_id=len(data_generator.vocab_list), + num_processes=args.num_processes_beam_search, + ext_scoring_func=ext_scorer, ) + for i, beam_search_result in enumerate(beam_search_results): + target_transcription = ''.join([ + data_generator.vocab_list[index] for index in infer_data[i][1] + ]) + wer_sum += wer(target_transcription, beam_search_result[0][1]) + wer_counter += 1 print("alpha = %f\tbeta = %f\tWER = %f" % (alpha, beta, wer_sum / wer_counter)) From e1e2914ec9f0972825c32c83531805ed458728d0 Mon Sep 17 00:00:00 2001 From: xushaoyong Date: Tue, 27 Jun 2017 18:06:16 +0800 Subject: [PATCH 073/335] remove augmentor unittest --- tests/test_augmentor.py | 65 ----------------------------------------- 1 file changed, 65 deletions(-) delete mode 100644 tests/test_augmentor.py diff --git a/tests/test_augmentor.py b/tests/test_augmentor.py deleted file mode 100644 index ee1f5439c..000000000 --- a/tests/test_augmentor.py +++ /dev/null @@ -1,65 +0,0 @@ -"""Test augmentor class.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import unittest -from data_utils import audio -from data_utils.augmentor.augmentation import AugmentationPipeline -import random -import numpy as np - -random_seed = 0 -#audio instance -audio_data = [3.0517571e-05, -8.54492188e-04, -1.09863281e-03, -9.4604492e-04,\ - -1.31225586e-03, -1.09863281e-03, -1.73950195e-03, -2.1057189e-03,\ - -2.04467773e-03, -1.46484375e-03, -1.43432617e-03, -9.4604492e-04,\ - -1.95312500e-03, -1.86157227e-03, -2.10571289e-03, -2.3193354e-03,\ - -2.01416016e-03, -2.62451172e-03, -2.07519531e-03, -2.3803719e-03] -audio_data = np.array(audio_data) -samplerate = 10 - - -class TestAugmentor(unittest.TestCase): - def test_volume(self): - config_json = '[{"type": "volume","params": {"min_gain_dBFS": -15, '\ - '"max_gain_dBFS": 15},"prob": 1.0}]' - aug_pipeline = AugmentationPipeline( - augmentation_config=config_json, random_seed=random_seed) - audio_seg = audio.AudioSegment(audio_data, samplerate) - aug_pipeline.transform_audio(audio_seg) - orig_audio = audio.AudioSegment(audio_data, samplerate) - self.assertFalse(np.any(audio_seg.samples == orig_audio.samples)) - - def test_speed(self): - config_json = '[{"type":"speed","params": {"min_speed_rate": 0.9,' \ - '"max_speed_rate": 1.1},"prob": 1.0}]' - aug_pipeline = AugmentationPipeline( - augmentation_config=config_json, random_seed=random_seed) - audio_seg = audio.AudioSegment(audio_data, samplerate) - aug_pipeline.transform_audio(audio_seg) - orig_audio = audio.AudioSegment(audio_data, samplerate) - self.assertFalse(np.any(audio_seg.samples == orig_audio.samples)) - - def test_resample(self): - config_json = '[{"type":"resample","params": {"new_sample_rate":5},'\ - '"prob": 1.0}]' - aug_pipeline = AugmentationPipeline( - augmentation_config=config_json, random_seed=random_seed) - audio_seg = audio.AudioSegment(audio_data, samplerate) - aug_pipeline.transform_audio(audio_seg) - self.assertTrue(audio_seg.sample_rate == 5) - - def test_bayesial(self): - config_json = '[{"type":"bayesian_normal","params":{"target_db":-20,' \ - '"prior_db":-4, "prior_samples": -8, "startup_delay": 0.0},"prob":1.0}]' - aug_pipeline = AugmentationPipeline( - augmentation_config=config_json, random_seed=random_seed) - audio_seg = audio.AudioSegment(audio_data, samplerate) - aug_pipeline.transform_audio(audio_seg) - orig_audio = audio.AudioSegment(audio_data, samplerate) - self.assertFalse(np.any(audio_seg.samples == orig_audio.samples)) - - -if __name__ == '__main__': - unittest.main() From 8ba98cb518d494a2f7a63a748cf7f8a82759c3bc Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Tue, 27 Jun 2017 18:35:49 +0800 Subject: [PATCH 074/335] fix decoders' unittest --- infer.py | 2 +- tests/test_decoders.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/infer.py b/infer.py index 5f0f268a8..686f2822c 100644 --- a/infer.py +++ b/infer.py @@ -63,7 +63,7 @@ parser.add_argument( help="Manifest path for decoding. (default: %(default)s)") parser.add_argument( "--model_filepath", - default='checkpoints/params.tar.gz.41', + default='checkpoints/params.latest.tar.gz', type=str, help="Model filepath. (default: %(default)s)") parser.add_argument( diff --git a/tests/test_decoders.py b/tests/test_decoders.py index 4435355cc..a5e19b08b 100644 --- a/tests/test_decoders.py +++ b/tests/test_decoders.py @@ -81,7 +81,8 @@ class TestDecoders(unittest.TestCase): probs_split=[self.probs_seq1, self.probs_seq2], beam_size=self.beam_size, vocabulary=self.vocab_list, - blank_id=len(self.vocab_list)) + blank_id=len(self.vocab_list), + num_processes=24) self.assertEqual(beam_results[0][0][1], self.beam_search_result[0]) self.assertEqual(beam_results[1][0][1], self.beam_search_result[1]) From db37c34919e5cb7377e8ed863a17d206a0d28c39 Mon Sep 17 00:00:00 2001 From: xushaoyong Date: Tue, 27 Jun 2017 18:48:49 +0800 Subject: [PATCH 075/335] modify some detail of augmentor --- data_utils/augmentor/augmentation.py | 3 ++- data_utils/augmentor/speed_perturb.py | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/data_utils/augmentor/augmentation.py b/data_utils/augmentor/augmentation.py index f8fd214a0..9dced4731 100644 --- a/data_utils/augmentor/augmentation.py +++ b/data_utils/augmentor/augmentation.py @@ -9,7 +9,8 @@ from data_utils.augmentor.volume_perturb import VolumePerturbAugmentor from data_utils.augmentor.shift_perturb import ShiftPerturbAugmentor from data_utils.augmentor.speed_perturb import SpeedPerturbAugmentor from data_utils.augmentor.resample import ResampleAugmentor -from data_utils.augmentor.online_bayesian_normalization import OnlineBayesianNormalizationAugmentor +from data_utils.augmentor.online_bayesian_normalization import \ + OnlineBayesianNormalizationAugmentor class AugmentationPipeline(object): diff --git a/data_utils/augmentor/speed_perturb.py b/data_utils/augmentor/speed_perturb.py index 8c6c8b63c..cc5738bd1 100644 --- a/data_utils/augmentor/speed_perturb.py +++ b/data_utils/augmentor/speed_perturb.py @@ -15,10 +15,10 @@ class SpeedPerturbAugmentor(AugmentorBase): :param rng: Random generator object. :type rng: random.Random :param min_speed_rate: Lower bound of new speed rate to sample and should - not below 0.9. + not be smaller than 0.9. :type min_speed_rate: float :param max_speed_rate: Upper bound of new speed rate to sample and should - not above 1.1. + not be larger than 1.1. :type max_speed_rate: float """ From aeccd9851b4d7137bcfa32ebf437298bfb9e478f Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Tue, 27 Jun 2017 20:22:47 +0800 Subject: [PATCH 076/335] append README.md --- README.md | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/README.md b/README.md index 2912ff314..41acf1020 100644 --- a/README.md +++ b/README.md @@ -77,3 +77,29 @@ More help for arguments: ``` python infer.py --help ``` + +### Evaluating + +``` +CUDA_VISIBLE_DEVICES=0 python evaluate.py +``` + +More help for arguments: + +``` +python evaluate.py --help +``` + +### Parameters tuning + +Parameters tuning for the CTC beam search decoder + +``` +CUDA_VISIBLE_DEVICES=0 python tune.py +``` + +More help for arguments: + +``` +python tune.py --help +``` From 2f15a7870754eca3da5da3cab649ed8c85ac0850 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Thu, 29 Jun 2017 10:05:02 +0800 Subject: [PATCH 077/335] add initial files for deployment --- deploy/ctc_beam_search_decoder.cpp | 143 +++++++++++++++++++++++++++++ deploy/ctc_beam_search_decoder.h | 19 ++++ deploy/ctc_beam_search_decoder.i | 22 +++++ deploy/decoder_setup.py | 58 ++++++++++++ deploy/scorer.cpp | 82 +++++++++++++++++ deploy/scorer.h | 22 +++++ deploy/scorer.i | 8 ++ deploy/scorer_setup.py | 54 +++++++++++ 8 files changed, 408 insertions(+) create mode 100644 deploy/ctc_beam_search_decoder.cpp create mode 100644 deploy/ctc_beam_search_decoder.h create mode 100644 deploy/ctc_beam_search_decoder.i create mode 100644 deploy/decoder_setup.py create mode 100644 deploy/scorer.cpp create mode 100644 deploy/scorer.h create mode 100644 deploy/scorer.i create mode 100644 deploy/scorer_setup.py diff --git a/deploy/ctc_beam_search_decoder.cpp b/deploy/ctc_beam_search_decoder.cpp new file mode 100644 index 000000000..297c7c24b --- /dev/null +++ b/deploy/ctc_beam_search_decoder.cpp @@ -0,0 +1,143 @@ +#include +#include +#include +#include +#include +#include "ctc_beam_search_decoder.h" + +template +bool pair_comp_first_rev(const std::pair a, const std::pair b) { + return a.first > b.first; +} + +template +bool pair_comp_second_rev(const std::pair a, const std::pair b) { + return a.second > b.second; +} + +/* CTC beam search decoder in C++, the interface is consistent with the original + decoder in Python version. +*/ +std::vector > + ctc_beam_search_decoder(std::vector > probs_seq, + int beam_size, + std::vector vocabulary, + int blank_id, + double cutoff_prob, + Scorer *ext_scorer, + bool nproc + ) +{ + int num_time_steps = probs_seq.size(); + + // assign space ID + std::vector::iterator it = std::find(vocabulary.begin(), vocabulary.end(), " "); + int space_id = it-vocabulary.begin(); + if(space_id >= vocabulary.size()) { + std::cout<<"The character space is not in the vocabulary!"; + exit(1); + } + + // initialize + // two sets containing selected and candidate prefixes respectively + std::map prefix_set_prev, prefix_set_next; + // probability of prefixes ending with blank and non-blank + std::map probs_b_prev, probs_nb_prev; + std::map probs_b_cur, probs_nb_cur; + prefix_set_prev["\t"] = 1.0; + probs_b_prev["\t"] = 1.0; + probs_nb_prev["\t"] = 0.0; + + for (int time_step=0; time_step prob = probs_seq[time_step]; + + std::vector > prob_idx; + for (int i=0; i(i, prob[i])); + } + // pruning of vacobulary + if (cutoff_prob < 1.0) { + std::sort(prob_idx.begin(), prob_idx.end(), pair_comp_second_rev); + float cum_prob = 0.0; + int cutoff_len = 0; + for (int i=0; i= cutoff_prob) break; + } + prob_idx = std::vector >(prob_idx.begin(), prob_idx.begin()+cutoff_len); + } + // extend prefix + for (std::map::iterator it = prefix_set_prev.begin(); + it != prefix_set_prev.end(); it++) { + std::string l = it->first; + if( prefix_set_next.find(l) == prefix_set_next.end()) { + probs_b_cur[l] = probs_nb_cur[l] = 0.0; + } + + for (int index=0; index 1) { + score = ext_scorer->get_score(l.substr(1)); + } + probs_nb_cur[l_plus] += score * prob_c * ( + probs_b_prev[l] + probs_nb_prev[l]); + } else { + probs_nb_cur[l_plus] += prob_c * ( + probs_b_prev[l] + probs_nb_prev[l]); + } + prefix_set_next[l_plus] = probs_nb_cur[l_plus]+probs_b_cur[l_plus]; + } + } + + prefix_set_next[l] = probs_b_cur[l]+probs_nb_cur[l]; + } + + probs_b_prev = probs_b_cur; + probs_nb_prev = probs_nb_cur; + std::vector > + prefix_vec_next(prefix_set_next.begin(), prefix_set_next.end()); + std::sort(prefix_vec_next.begin(), prefix_vec_next.end(), pair_comp_second_rev); + int k = beam_size + (prefix_vec_next.begin(), prefix_vec_next.begin()+k); + } + + // post processing + std::vector > beam_result; + for (std::map::iterator it = prefix_set_prev.begin(); + it != prefix_set_prev.end(); it++) { + if (it->second > 0.0 && it->first.size() > 1) { + double prob = it->second; + std::string sentence = it->first.substr(1); + // scoring the last word + if (ext_scorer != NULL && sentence[sentence.size()-1] != ' ') { + prob = prob * ext_scorer->get_score(sentence); + } + double log_prob = log(it->second); + beam_result.push_back(std::pair(log_prob, it->first)); + } + } + // sort the result and return + std::sort(beam_result.begin(), beam_result.end(), pair_comp_first_rev); + return beam_result; +} diff --git a/deploy/ctc_beam_search_decoder.h b/deploy/ctc_beam_search_decoder.h new file mode 100644 index 000000000..d23252ace --- /dev/null +++ b/deploy/ctc_beam_search_decoder.h @@ -0,0 +1,19 @@ +#ifndef CTC_BEAM_SEARCH_DECODER_H_ +#define CTC_BEAM_SEARCH_DECODER_H_ + +#include +#include +#include +#include "scorer.h" + +std::vector > + ctc_beam_search_decoder(std::vector > probs_seq, + int beam_size, + std::vector vocabulary, + int blank_id=0, + double cutoff_prob=1.0, + Scorer *ext_scorer=NULL, + bool nproc=false + ); + +#endif // CTC_BEAM_SEARCH_DECODER_H_ diff --git a/deploy/ctc_beam_search_decoder.i b/deploy/ctc_beam_search_decoder.i new file mode 100644 index 000000000..09e893d38 --- /dev/null +++ b/deploy/ctc_beam_search_decoder.i @@ -0,0 +1,22 @@ +%module swig_ctc_beam_search_decoder +%{ +#include "ctc_beam_search_decoder.h" +%} + +%include "std_vector.i" +%include "std_pair.i" +%include "std_string.i" + +namespace std{ + %template(DoubleVector) std::vector; + %template(IntVector) std::vector; + %template(StringVector) std::vector; + %template(VectorOfStructVector) std::vector >; + %template(FloatVector) std::vector; + %template(Pair) std::pair; + %template(PairFloatStringVector) std::vector >; + %template(PairDoubleStringVector) std::vector >; +} + +%import scorer.h +%include "ctc_beam_search_decoder.h" diff --git a/deploy/decoder_setup.py b/deploy/decoder_setup.py new file mode 100644 index 000000000..5201172b1 --- /dev/null +++ b/deploy/decoder_setup.py @@ -0,0 +1,58 @@ +from setuptools import setup, Extension +import glob +import platform +import os + + +def compile_test(header, library): + dummy_path = os.path.join(os.path.dirname(__file__), "dummy") + command = "bash -c \"g++ -include " + header + " -l" + library + " -x c++ - <<<'int main() {}' -o " + dummy_path + " >/dev/null 2>/dev/null && rm " + dummy_path + " 2>/dev/null\"" + return os.system(command) == 0 + + +FILES = glob.glob('util/*.cc') + glob.glob('lm/*.cc') + glob.glob( + 'util/double-conversion/*.cc') +FILES = [ + fn for fn in FILES if not (fn.endswith('main.cc') or fn.endswith('test.cc')) +] + +LIBS = ['stdc++'] +if platform.system() != 'Darwin': + LIBS.append('rt') + +ARGS = ['-O3', '-DNDEBUG', '-DKENLM_MAX_ORDER=6'] + +if compile_test('zlib.h', 'z'): + ARGS.append('-DHAVE_ZLIB') + LIBS.append('z') + +if compile_test('bzlib.h', 'bz2'): + ARGS.append('-DHAVE_BZLIB') + LIBS.append('bz2') + +if compile_test('lzma.h', 'lzma'): + ARGS.append('-DHAVE_XZLIB') + LIBS.append('lzma') + +os.system('swig -python -c++ ./ctc_beam_search_decoder.i') + +ctc_beam_search_decoder_module = [ + Extension( + name='_swig_ctc_beam_search_decoder', + sources=FILES + [ + 'scorer.cpp', 'ctc_beam_search_decoder_wrap.cxx', + 'ctc_beam_search_decoder.cpp' + ], + language='C++', + include_dirs=['.'], + libraries=LIBS, + extra_compile_args=ARGS) +] + +setup( + name='swig_ctc_beam_search_decoder', + version='0.1', + author='Yibing Liu', + description="""CTC beam search decoder""", + ext_modules=ctc_beam_search_decoder_module, + py_modules=['swig_ctc_beam_search_decoder'], ) diff --git a/deploy/scorer.cpp b/deploy/scorer.cpp new file mode 100644 index 000000000..9cb680556 --- /dev/null +++ b/deploy/scorer.cpp @@ -0,0 +1,82 @@ +#include + +#include "scorer.h" +#include "lm/model.hh" +#include "util/tokenize_piece.hh" +#include "util/string_piece.hh" + +using namespace lm::ngram; + +Scorer::Scorer(float alpha, float beta, std::string lm_model_path) { + this->_alpha = alpha; + this->_beta = beta; + this->_language_model = new Model(lm_model_path.c_str()); +} + +Scorer::~Scorer(){ + delete (Model *)this->_language_model; +} + +inline void strip(std::string &str, char ch=' ') { + if (str.size() == 0) return; + int start = 0; + int end = str.size()-1; + for (int i=0; i=0; i--) { + if (str[i] == ch) { + end --; + } else { + break; + } + } + + if (start == 0 && end == str.size()-1) return; + if (start > end) { + std::string emp_str; + str = emp_str; + } else { + str = str.substr(start, end-start+1); + } +} + +int Scorer::word_count(std::string sentence) { + strip(sentence); + int cnt = 0; + for (int i=0; i 0) cnt ++; + return cnt; +} + +double Scorer::language_model_score(std::string sentence) { + Model *model = (Model *)this->_language_model; + State state, out_state; + lm::FullScoreReturn ret; + state = model->BeginSentenceState(); + + for (util::TokenIter it(sentence, ' '); it; ++it){ + lm::WordIndex vocab = model->GetVocabulary().Index(*it); + ret = model->FullScore(state, vocab, out_state); + state = out_state; + } + double score = ret.prob; + + return pow(10, score); +} + +double Scorer::get_score(std::string sentence) { + double lm_score = language_model_score(sentence); + int word_cnt = word_count(sentence); + + double final_score = pow(lm_score, _alpha) * pow(word_cnt, _beta); + return final_score; +} diff --git a/deploy/scorer.h b/deploy/scorer.h new file mode 100644 index 000000000..47261bb51 --- /dev/null +++ b/deploy/scorer.h @@ -0,0 +1,22 @@ +#ifndef SCORER_H_ +#define SCORER_H_ + +#include + + +class Scorer{ +private: + float _alpha; + float _beta; + void *_language_model; + +public: + Scorer(){} + Scorer(float alpha, float beta, std::string lm_model_path); + ~Scorer(); + int word_count(std::string); + double language_model_score(std::string); + double get_score(std::string); +}; + +#endif diff --git a/deploy/scorer.i b/deploy/scorer.i new file mode 100644 index 000000000..8380e15a6 --- /dev/null +++ b/deploy/scorer.i @@ -0,0 +1,8 @@ +%module swig_scorer +%{ +#include "scorer.h" +%} + +%include "std_string.i" + +%include "scorer.h" diff --git a/deploy/scorer_setup.py b/deploy/scorer_setup.py new file mode 100644 index 000000000..c0006e071 --- /dev/null +++ b/deploy/scorer_setup.py @@ -0,0 +1,54 @@ +from setuptools import setup, Extension +import glob +import platform +import os + + +def compile_test(header, library): + dummy_path = os.path.join(os.path.dirname(__file__), "dummy") + command = "bash -c \"g++ -include " + header + " -l" + library + " -x c++ - <<<'int main() {}' -o " + dummy_path + " >/dev/null 2>/dev/null && rm " + dummy_path + " 2>/dev/null\"" + return os.system(command) == 0 + + +FILES = glob.glob('util/*.cc') + glob.glob('lm/*.cc') + glob.glob( + 'util/double-conversion/*.cc') +FILES = [ + fn for fn in FILES if not (fn.endswith('main.cc') or fn.endswith('test.cc')) +] + +LIBS = ['stdc++'] +if platform.system() != 'Darwin': + LIBS.append('rt') + +ARGS = ['-O3', '-DNDEBUG', '-DKENLM_MAX_ORDER=6'] + +if compile_test('zlib.h', 'z'): + ARGS.append('-DHAVE_ZLIB') + LIBS.append('z') + +if compile_test('bzlib.h', 'bz2'): + ARGS.append('-DHAVE_BZLIB') + LIBS.append('bz2') + +if compile_test('lzma.h', 'lzma'): + ARGS.append('-DHAVE_XZLIB') + LIBS.append('lzma') + +os.system('swig -python -c++ ./scorer.i') + +ext_modules = [ + Extension( + name='_swig_scorer', + sources=FILES + ['scorer_wrap.cxx', 'scorer.cpp'], + language='C++', + include_dirs=['.'], + libraries=LIBS, + extra_compile_args=ARGS) +] + +setup( + name='swig_scorer', + version='0.1', + ext_modules=ext_modules, + include_package_data=True, + py_modules=['swig_scorer'], ) From 7c7e17e24954c74292b2fa5320d460bde964f028 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Thu, 29 Jun 2017 11:19:39 +0800 Subject: [PATCH 078/335] add deploy.py --- deploy.py | 194 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 194 insertions(+) create mode 100644 deploy.py diff --git a/deploy.py b/deploy.py new file mode 100644 index 000000000..3272371bf --- /dev/null +++ b/deploy.py @@ -0,0 +1,194 @@ +"""Deployment for DeepSpeech2 model.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import gzip +import distutils.util +import multiprocessing +import paddle.v2 as paddle +from data_utils.data import DataGenerator +from model import deep_speech2 +from swig_ctc_beam_search_decoder import * +from swig_scorer import Scorer +from error_rate import wer +import utils + +parser = argparse.ArgumentParser(description=__doc__) +parser.add_argument( + "--num_samples", + default=100, + type=int, + help="Number of samples for inference. (default: %(default)s)") +parser.add_argument( + "--num_conv_layers", + default=2, + type=int, + help="Convolution layer number. (default: %(default)s)") +parser.add_argument( + "--num_rnn_layers", + default=3, + type=int, + help="RNN layer number. (default: %(default)s)") +parser.add_argument( + "--rnn_layer_size", + default=512, + type=int, + help="RNN layer cell number. (default: %(default)s)") +parser.add_argument( + "--use_gpu", + default=True, + type=distutils.util.strtobool, + help="Use gpu or not. (default: %(default)s)") +parser.add_argument( + "--num_threads_data", + default=multiprocessing.cpu_count(), + type=int, + help="Number of cpu threads for preprocessing data. (default: %(default)s)") +parser.add_argument( + "--mean_std_filepath", + default='mean_std.npz', + type=str, + help="Manifest path for normalizer. (default: %(default)s)") +parser.add_argument( + "--decode_manifest_path", + default='datasets/manifest.test', + type=str, + help="Manifest path for decoding. (default: %(default)s)") +parser.add_argument( + "--model_filepath", + default='ds2_new_models_0628/params.pass-51.tar.gz', + type=str, + help="Model filepath. (default: %(default)s)") +parser.add_argument( + "--vocab_filepath", + default='datasets/vocab/eng_vocab.txt', + type=str, + help="Vocabulary filepath. (default: %(default)s)") +parser.add_argument( + "--decode_method", + default='beam_search', + type=str, + help="Method for ctc decoding: best_path or beam_search. (default: %(default)s)" +) +parser.add_argument( + "--beam_size", + default=500, + type=int, + help="Width for beam search decoding. (default: %(default)d)") +parser.add_argument( + "--num_results_per_sample", + default=1, + type=int, + help="Number of output per sample in beam search. (default: %(default)d)") +parser.add_argument( + "--language_model_path", + default="lm/data/en.00.UNKNOWN.klm", + type=str, + help="Path for language model. (default: %(default)s)") +parser.add_argument( + "--alpha", + default=0.26, + type=float, + help="Parameter associated with language model. (default: %(default)f)") +parser.add_argument( + "--beta", + default=0.1, + type=float, + help="Parameter associated with word count. (default: %(default)f)") +parser.add_argument( + "--cutoff_prob", + default=0.99, + type=float, + help="The cutoff probability of pruning" + "in beam search. (default: %(default)f)") +args = parser.parse_args() + + +def infer(): + """Deployment for DeepSpeech2.""" + # initialize data generator + data_generator = DataGenerator( + vocab_filepath=args.vocab_filepath, + mean_std_filepath=args.mean_std_filepath, + augmentation_config='{}', + num_threads=args.num_threads_data) + + # create network config + # paddle.data_type.dense_array is used for variable batch input. + # The size 161 * 161 is only an placeholder value and the real shape + # of input batch data will be induced during training. + audio_data = paddle.layer.data( + name="audio_spectrogram", type=paddle.data_type.dense_array(161 * 161)) + text_data = paddle.layer.data( + name="transcript_text", + type=paddle.data_type.integer_value_sequence(data_generator.vocab_size)) + output_probs = deep_speech2( + audio_data=audio_data, + text_data=text_data, + dict_size=data_generator.vocab_size, + num_conv_layers=args.num_conv_layers, + num_rnn_layers=args.num_rnn_layers, + rnn_size=args.rnn_layer_size, + is_inference=True) + + # load parameters + parameters = paddle.parameters.Parameters.from_tar( + gzip.open(args.model_filepath)) + + # prepare infer data + batch_reader = data_generator.batch_reader_creator( + manifest_path=args.decode_manifest_path, + batch_size=args.num_samples, + min_batch_size=1, + sortagrad=False, + shuffle_method=None) + infer_data = batch_reader().next() + + # run inference + infer_results = paddle.infer( + output_layer=output_probs, parameters=parameters, input=infer_data) + num_steps = len(infer_results) // len(infer_data) + probs_split = [ + infer_results[i * num_steps:(i + 1) * num_steps] + for i in xrange(len(infer_data)) + ] + + # targe transcription + target_transcription = [ + ''.join( + [data_generator.vocab_list[index] for index in infer_data[i][1]]) + for i, probs in enumerate(probs_split) + ] + + ext_scorer = Scorer(args.alpha, args.beta, args.language_model_path) + ## decode and print + + wer_sum, wer_counter = 0, 0 + for i, probs in enumerate(probs_split): + beam_result = ctc_beam_search_decoder( + probs.tolist(), + args.beam_size, + data_generator.vocab_list, + len(data_generator.vocab_list), + args.cutoff_prob, + ext_scorer, ) + + print("\nTarget Transcription:\t%s" % target_transcription[i]) + print("Beam %d: %f \t%s" % (0, beam_result[0][0], beam_result[0][1])) + wer_cur = wer(target_transcription[i], beam_result[0][1]) + wer_sum += wer_cur + wer_counter += 1 + print("cur wer = %f , average wer = %f" % + (wer_cur, wer_sum / wer_counter)) + + +def main(): + utils.print_arguments(args) + paddle.init(use_gpu=args.use_gpu, trainer_count=1) + infer() + + +if __name__ == '__main__': + main() From 3c77d369ca97c97dc982ca20f27a14869d498f6f Mon Sep 17 00:00:00 2001 From: wanghaoshuang Date: Mon, 3 Jul 2017 15:14:23 +0800 Subject: [PATCH 079/335] Make ds2 run on paddle cloud 1. Refine data_utils/data.py to read bytes from tar file 2. Add scripts to submit paddle cloud job for ds2 trainning --- data_utils/data.py | 58 ++++++++++++++++++++++++----- datasets/librispeech/pcloud_data.py | 51 +++++++++++++++++++++++++ pcloud_split_data.py | 47 +++++++++++++++++++++++ pcloud_submit.sh | 13 +++++++ pcloud_train.sh | 32 ++++++++++++++++ 5 files changed, 191 insertions(+), 10 deletions(-) create mode 100644 datasets/librispeech/pcloud_data.py create mode 100644 pcloud_split_data.py create mode 100644 pcloud_submit.sh create mode 100644 pcloud_train.sh diff --git a/data_utils/data.py b/data_utils/data.py index d01ca8cc7..e1fa47477 100644 --- a/data_utils/data.py +++ b/data_utils/data.py @@ -8,13 +8,20 @@ from __future__ import print_function import random import numpy as np import multiprocessing +from threading import local import paddle.v2 as paddle +import tarfile from data_utils import utils from data_utils.augmentor.augmentation import AugmentationPipeline from data_utils.featurizer.speech_featurizer import SpeechFeaturizer from data_utils.speech import SpeechSegment from data_utils.normalizer import FeatureNormalizer +# for caching tar files info +local_data = local() +local_data.tar2info = {} +local_data.tar2object = {} + class DataGenerator(object): """ @@ -45,9 +52,6 @@ class DataGenerator(object): :types max_freq: None|float :param specgram_type: Specgram feature type. Options: 'linear'. :type specgram_type: str - :param use_dB_normalization: Whether to normalize the audio to -20 dB - before extracting the features. - :type use_dB_normalization: bool :param num_threads: Number of CPU threads for processing data. :type num_threads: int :param random_seed: Random seed. @@ -64,7 +68,6 @@ class DataGenerator(object): window_ms=20.0, max_freq=None, specgram_type='linear', - use_dB_normalization=True, num_threads=multiprocessing.cpu_count(), random_seed=0): self._max_duration = max_duration @@ -77,12 +80,15 @@ class DataGenerator(object): specgram_type=specgram_type, stride_ms=stride_ms, window_ms=window_ms, - max_freq=max_freq, - use_dB_normalization=use_dB_normalization) + max_freq=max_freq) self._num_threads = num_threads self._rng = random.Random(random_seed) self._epoch = 0 + # for caching tar files info + self.tar2info = {} + self.tar2object = {} + def batch_reader_creator(self, manifest_path, batch_size, @@ -94,7 +100,7 @@ class DataGenerator(object): """ Batch data reader creator for audio data. Return a callable generator function to produce batches of data. - + Audio features within one batch will be padded with zeros to have the same shape, or a user-defined shape. @@ -174,9 +180,9 @@ class DataGenerator(object): @property def feeding(self): """Returns data reader's feeding dict. - + :return: Data feeding dict. - :rtype: dict + :rtype: dict """ return {"audio_spectrogram": 0, "transcript_text": 1} @@ -198,9 +204,41 @@ class DataGenerator(object): """ return self._speech_featurizer.vocab_list + def _parse_tar(self, file): + """ + Parse a tar file to get a tarfile object and a map containing tarinfoes + """ + result = {} + f = tarfile.open(file) + for tarinfo in f.getmembers(): + result[tarinfo.name] = tarinfo + return f, result + + def _read_soundbytes(self, filepath): + """ + Read bytes from file. + If filepath startwith tar, we will read bytes from tar file + and cached tar file info for next reading request. + """ + if filepath.startswith('tar:'): + tarpath, filename = filepath.split(':', 1)[1].split('#', 1) + if 'tar2info' not in local_data.__dict__: + local_data.tar2info = {} + if 'tar2object' not in local_data.__dict__: + local_data.tar2object = {} + if tarpath not in local_data.tar2info: + object, infoes = self._parse_tar(tarpath) + local_data.tar2info[tarpath] = infoes + local_data.tar2object[tarpath] = object + return local_data.tar2object[tarpath].extractfile( + local_data.tar2info[tarpath][filename]).read() + else: + return open(filepath).read() + def _process_utterance(self, filename, transcript): """Load, augment, featurize and normalize for speech data.""" - speech_segment = SpeechSegment.from_file(filename, transcript) + speech_segment = SpeechSegment.from_bytes( + self._read_soundbytes(filename), transcript) self._augmentation_pipeline.transform_audio(speech_segment) specgram, text_ids = self._speech_featurizer.featurize(speech_segment) specgram = self._normalizer.apply(specgram) diff --git a/datasets/librispeech/pcloud_data.py b/datasets/librispeech/pcloud_data.py new file mode 100644 index 000000000..914001144 --- /dev/null +++ b/datasets/librispeech/pcloud_data.py @@ -0,0 +1,51 @@ +import json +import os +import tarfile +import sys +import argparse + +parser = argparse.ArgumentParser(description=__doc__) +parser.add_argument( + "--manifest_path", + default="/manifest.train", + type=str, + help="Manifest of target data. (default: %(default)s)") +parser.add_argument( + "--out_tar_path", + default="/dev.tar", + type=str, + help="Output tar file path. (default: %(default)s)") +parser.add_argument( + "--out_manifest_path", + default="/dev.mani", + type=str, + help="Manifest of output data. (default: %(default)s)") +args = parser.parse_args() + + +def gen_pcloud_data(manifest_path, out_tar_path, out_manifest_path): + ''' + 1. According manifest, tar sound files into out_tar_path + 2. Generate a new manifest for output tar file + ''' + out_tar = tarfile.open(out_tar_path, 'w') + manifest = [] + for json_line in open(manifest_path): + try: + json_data = json.loads(json_line) + except Exception as e: + raise IOError("Error reading manifest: %s" % str(e)) + sound_file = json_data['audio_filepath'] + filename = os.path.basename(sound_file) + out_tar.add(sound_file, arcname=filename) + json_data['audio_filepath'] = filename + manifest.append("%s\n" % json.dumps(json_data)) + with open(out_manifest_path, 'w') as out_manifest: + out_manifest.writelines(manifest) + out_manifest.close() + out_tar.close() + + +if __name__ == '__main__': + gen_pcloud_data(args.manifest_path, args.out_tar_path, + args.out_manifest_path) diff --git a/pcloud_split_data.py b/pcloud_split_data.py new file mode 100644 index 000000000..bf35383af --- /dev/null +++ b/pcloud_split_data.py @@ -0,0 +1,47 @@ +import os +import json +import argparse + + +def split_data(inManifest, tar_path, outManifest): + trainer_id = 1 + trainer_count = 2 + #with open("/trainer_id", "r") as f: + # trainer_id = int(f.readline()[:-1]) + #with open("/trainer_count", "r") as f: + # trainer_count = int(f.readline()[:-1]) + + tarPath = os.path.abspath(tar_path) + result = [] + for index, json_line in enumerate(open(inManifest)): + if (index % trainer_count) == trainer_id: + json_data = json.loads(json_line) + json_data['audio_filepath'] = "tar:%s#%s" % ( + tarPath, json_data['audio_filepath']) + result.append("%s\n" % json.dumps(json_data)) + with open(outManifest, 'w') as manifest: + manifest.writelines(result) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description=__doc__) + + parser.add_argument( + "--in_manifest_path", + default='datasets/dev.mani', + type=str, + help="Input manifest path. (default: %(default)s)") + parser.add_argument( + "--data_tar_path", + default='datasets/dev.tar', + type=str, + help="Data tar file path. (default: %(default)s)") + parser.add_argument( + "--out_manifest_path", + default='datasets/dev.mani.split', + type=str, + help="Out manifest file path. (default: %(default)s)") + args = parser.parse_args() + + split_data(args.in_manifest_path, args.data_tar_path, + args.out_manifest_path) diff --git a/pcloud_submit.sh b/pcloud_submit.sh new file mode 100644 index 000000000..06e65110d --- /dev/null +++ b/pcloud_submit.sh @@ -0,0 +1,13 @@ +paddlecloud submit \ +-image wanghaoshuang/pcloud_ds2 \ +-jobname ds23 \ +-cpu 1 \ +-gpu 0 \ +-memory 10Gi \ +-parallelism 1 \ +-pscpu 1 \ +-pservers 1 \ +-psmemory 10Gi \ +-passes 1 \ +-entry "sh pcloud_train.sh" \ +./deep_speech_2 diff --git a/pcloud_train.sh b/pcloud_train.sh new file mode 100644 index 000000000..fb6cbb9ec --- /dev/null +++ b/pcloud_train.sh @@ -0,0 +1,32 @@ +#setted by user +TRAIN_MANI='/pfs/dlnel/home/yanxu05@baidu.com/wanghaoshuang/data/ds2_data/demo.mani' +#setted by user +DEV_MANI='/pfs/dlnel/home/yanxu05@baidu.com/wanghaoshuang/data/ds2_data/demo.mani' +#setted by user +TRAIN_TAR='/pfs/dlnel/home/yanxu05@baidu.com/wanghaoshuang/data/ds2_data/demo.tar' +#setted by user +DEV_TAR='/pfs/dlnel/home/yanxu05@baidu.com/wanghaoshuang/data/ds2_data/demo.tar' +#setted by user +VOCAB_PATH='/pfs/dlnel/home/yanxu05@baidu.com/wanghaoshuang/data/ds2_data/eng_vocab.txt' +#setted by user +MEAN_STD_FILE='/pfs/dlnel/home/yanxu05@baidu.com/wanghaoshuang/data/ds2_data/mean_std.npz' + +# split train data for each pcloud node +python pcloud_split_data.py \ +--in_manifest_path=$TRAIN_MANI \ +--data_tar_path=$TRAIN_TAR \ +--out_manifest_path='./train.mani' +# split dev data for each pcloud node +python pcloud_split_data.py \ +--in_manifest_path=$DEV_MANI \ +--data_tar_path=$DEV_TAR \ +--out_manifest_path='./dev.mani' + +python train.py \ +--use_gpu=0 \ +--trainer_count=4 \ +--batch_size=2 \ +--mean_std_filepath=$MEAN_STD_FILE \ +--train_manifest_path='./train.mani' \ +--dev_manifest_path='./dev.mani' \ +--vocab_filepath=$VOCAB_PATH \ From d9d9514269298eec7f1f3abd54f54b401c1c525c Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Tue, 4 Jul 2017 19:15:34 +0800 Subject: [PATCH 080/335] fix bugs --- deploy.py | 5 +++-- deploy/ctc_beam_search_decoder.cpp | 28 ++++++++++++++-------------- deploy/scorer.cpp | 14 +++++++------- 3 files changed, 24 insertions(+), 23 deletions(-) diff --git a/deploy.py b/deploy.py index 3272371bf..d8a7e5b27 100644 --- a/deploy.py +++ b/deploy.py @@ -58,7 +58,7 @@ parser.add_argument( help="Manifest path for decoding. (default: %(default)s)") parser.add_argument( "--model_filepath", - default='ds2_new_models_0628/params.pass-51.tar.gz', + default='checkpoints/params.latest.tar.gz', type=str, help="Model filepath. (default: %(default)s)") parser.add_argument( @@ -162,9 +162,10 @@ def infer(): for i, probs in enumerate(probs_split) ] + # external scorer ext_scorer = Scorer(args.alpha, args.beta, args.language_model_path) - ## decode and print + ## decode and print wer_sum, wer_counter = 0, 0 for i, probs in enumerate(probs_split): beam_result = ctc_beam_search_decoder( diff --git a/deploy/ctc_beam_search_decoder.cpp b/deploy/ctc_beam_search_decoder.cpp index 297c7c24b..68d1a8457 100644 --- a/deploy/ctc_beam_search_decoder.cpp +++ b/deploy/ctc_beam_search_decoder.cpp @@ -15,10 +15,10 @@ bool pair_comp_second_rev(const std::pair a, const std::pair b) return a.second > b.second; } -/* CTC beam search decoder in C++, the interface is consistent with the original +/* CTC beam search decoder in C++, the interface is consistent with the original decoder in Python version. */ -std::vector > +std::vector > ctc_beam_search_decoder(std::vector > probs_seq, int beam_size, std::vector vocabulary, @@ -29,15 +29,15 @@ std::vector > ) { int num_time_steps = probs_seq.size(); - - // assign space ID + + // assign space ID std::vector::iterator it = std::find(vocabulary.begin(), vocabulary.end(), " "); int space_id = it-vocabulary.begin(); if(space_id >= vocabulary.size()) { std::cout<<"The character space is not in the vocabulary!"; - exit(1); + exit(1); } - + // initialize // two sets containing selected and candidate prefixes respectively std::map prefix_set_prev, prefix_set_next; @@ -47,7 +47,7 @@ std::vector > prefix_set_prev["\t"] = 1.0; probs_b_prev["\t"] = 1.0; probs_nb_prev["\t"] = 0.0; - + for (int time_step=0; time_step > } prob_idx = std::vector >(prob_idx.begin(), prob_idx.begin()+cutoff_len); } - // extend prefix - for (std::map::iterator it = prefix_set_prev.begin(); + // extend prefix + for (std::map::iterator it = prefix_set_prev.begin(); it != prefix_set_prev.end(); it++) { std::string l = it->first; if( prefix_set_next.find(l) == prefix_set_next.end()) { @@ -109,12 +109,12 @@ std::vector > } } - prefix_set_next[l] = probs_b_cur[l]+probs_nb_cur[l]; + prefix_set_next[l] = probs_b_cur[l]+probs_nb_cur[l]; } probs_b_prev = probs_b_cur; probs_nb_prev = probs_nb_cur; - std::vector > + std::vector > prefix_vec_next(prefix_set_next.begin(), prefix_set_next.end()); std::sort(prefix_vec_next.begin(), prefix_vec_next.end(), pair_comp_second_rev); int k = beam_size > // post processing std::vector > beam_result; - for (std::map::iterator it = prefix_set_prev.begin(); + for (std::map::iterator it = prefix_set_prev.begin(); it != prefix_set_prev.end(); it++) { if (it->second > 0.0 && it->first.size() > 1) { double prob = it->second; @@ -133,8 +133,8 @@ std::vector > if (ext_scorer != NULL && sentence[sentence.size()-1] != ' ') { prob = prob * ext_scorer->get_score(sentence); } - double log_prob = log(it->second); - beam_result.push_back(std::pair(log_prob, it->first)); + double log_prob = log(prob); + beam_result.push_back(std::pair(log_prob, sentence)); } } // sort the result and return diff --git a/deploy/scorer.cpp b/deploy/scorer.cpp index 9cb680556..d7f68d71f 100644 --- a/deploy/scorer.cpp +++ b/deploy/scorer.cpp @@ -35,7 +35,7 @@ inline void strip(std::string &str, char ch=' ') { break; } } - + if (start == 0 && end == str.size()-1) return; if (start > end) { std::string emp_str; @@ -47,13 +47,12 @@ inline void strip(std::string &str, char ch=' ') { int Scorer::word_count(std::string sentence) { strip(sentence); - int cnt = 0; + int cnt = 1; for (int i=0; i 0) cnt ++; return cnt; } @@ -68,15 +67,16 @@ double Scorer::language_model_score(std::string sentence) { ret = model->FullScore(state, vocab, out_state); state = out_state; } - double score = ret.prob; - - return pow(10, score); + //log10 prob + double log_prob = ret.prob; + + return log_prob; } double Scorer::get_score(std::string sentence) { double lm_score = language_model_score(sentence); int word_cnt = word_count(sentence); - double final_score = pow(lm_score, _alpha) * pow(word_cnt, _beta); + double final_score = pow(10, _alpha*lm_score) * pow(word_cnt, _beta); return final_score; } From 37e98df74df04bd266913ec1b2665f696a8ba1ca Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Tue, 4 Jul 2017 19:48:56 +0800 Subject: [PATCH 081/335] enable resetting params in scorer --- lm/lm_scorer.py | 5 +++++ tests/test_decoders.py | 2 +- tune.py | 8 ++++++-- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/lm/lm_scorer.py b/lm/lm_scorer.py index 1c029e97f..de41754f9 100644 --- a/lm/lm_scorer.py +++ b/lm/lm_scorer.py @@ -42,6 +42,11 @@ class LmScorer(object): words = sentence.strip().split(' ') return len(words) + # reset alpha and beta + def reset_params(self, alpha, beta): + self._alpha = alpha + self._beta = beta + # execute evaluation def __call__(self, sentence, log=False): """Evaluation function, gathering all the different scores diff --git a/tests/test_decoders.py b/tests/test_decoders.py index a5e19b08b..99d8a8289 100644 --- a/tests/test_decoders.py +++ b/tests/test_decoders.py @@ -76,7 +76,7 @@ class TestDecoders(unittest.TestCase): blank_id=len(self.vocab_list)) self.assertEqual(beam_result[0][1], self.beam_search_result[1]) - def test_beam_search_nproc_decoder(self): + def test_beam_search_decoder_batch(self): beam_results = ctc_beam_search_decoder_batch( probs_split=[self.probs_seq1, self.probs_seq2], beam_size=self.beam_size, diff --git a/tune.py b/tune.py index 9cea66b90..e26bc45ce 100644 --- a/tune.py +++ b/tune.py @@ -12,6 +12,7 @@ from model import deep_speech2 from decoder import * from lm.lm_scorer import LmScorer from error_rate import wer +import utils parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( @@ -180,10 +181,13 @@ def tune(): params_grid = [(alpha, beta) for alpha in cand_alphas for beta in cand_betas] + ext_scorer = LmScorer(args.alpha_from, args.beta_from, + args.language_model_path) ## tune parameters in loop - for (alpha, beta) in params_grid: + for alpha, beta in params_grid: wer_sum, wer_counter = 0, 0 - ext_scorer = LmScorer(alpha, beta, args.language_model_path) + # reset scorer + ext_scorer.reset_params(alpha, beta) # beam search using multiple processes beam_search_results = ctc_beam_search_decoder_batch( probs_split=probs_split, From 0cadc56a8417c9f1f0d3e21d3fec4363cf0b00a2 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Wed, 5 Jul 2017 11:05:26 +0800 Subject: [PATCH 082/335] follow comments in code format --- decoder.py | 12 ++++-------- evaluate.py | 4 ++-- infer.py | 2 +- lm/lm_scorer.py | 6 ++---- tune.py | 2 +- 5 files changed, 10 insertions(+), 16 deletions(-) diff --git a/decoder.py b/decoder.py index 4676b02b7..a1fadc2c8 100644 --- a/decoder.py +++ b/decoder.py @@ -5,6 +5,7 @@ from __future__ import print_function from itertools import groupby import numpy as np +from math import log import multiprocessing @@ -97,13 +98,8 @@ def ctc_beam_search_decoder(probs_seq, # prefix_set_prev: the set containing selected prefixes # probs_b_prev: prefixes' probability ending with blank in previous step # probs_nb_prev: prefixes' probability ending with non-blank in previous step - prefix_set_prev, probs_b_prev, probs_nb_prev = { - '\t': 1.0 - }, { - '\t': 1.0 - }, { - '\t': 0.0 - } + prefix_set_prev = {'\t': 1.0} + probs_b_prev, probs_nb_prev = {'\t': 1.0}, {'\t': 0.0} ## extend prefix in loop for time_step in xrange(len(probs_seq)): @@ -179,7 +175,7 @@ def ctc_beam_search_decoder(probs_seq, # score last word by external scorer if (ext_scoring_func is not None) and (result[-1] != ' '): prob = prob * ext_scoring_func(result) - log_prob = np.log(prob) + log_prob = log(prob) beam_result.append((log_prob, result)) ## output top beam_size decoding results diff --git a/evaluate.py b/evaluate.py index 7ef32ad12..a4f2a690a 100644 --- a/evaluate.py +++ b/evaluate.py @@ -62,7 +62,7 @@ parser.add_argument( ) parser.add_argument( "--language_model_path", - default="data/en.00.UNKNOWN.klm", + default="lm/data/1Billion.klm", type=str, help="Path for language model. (default: %(default)s)") parser.add_argument( @@ -88,7 +88,7 @@ parser.add_argument( help="Width for beam search decoding. (default: %(default)d)") parser.add_argument( "--decode_manifest_path", - default='data/manifest.libri.test-clean', + default='datasets/manifest.test', type=str, help="Manifest path for decoding. (default: %(default)s)") parser.add_argument( diff --git a/infer.py b/infer.py index 686f2822c..dc1430804 100644 --- a/infer.py +++ b/infer.py @@ -89,7 +89,7 @@ parser.add_argument( help="Number of output per sample in beam search. (default: %(default)d)") parser.add_argument( "--language_model_path", - default="lm/data/en.00.UNKNOWN.klm", + default="lm/data/1Billion.klm", type=str, help="Path for language model. (default: %(default)s)") parser.add_argument( diff --git a/lm/lm_scorer.py b/lm/lm_scorer.py index de41754f9..463e96d66 100644 --- a/lm/lm_scorer.py +++ b/lm/lm_scorer.py @@ -62,9 +62,7 @@ class LmScorer(object): lm = self._language_model_score(sentence) word_cnt = self._word_count(sentence) if log == False: - score = np.power(lm, self._alpha) \ - * np.power(word_cnt, self._beta) + score = np.power(lm, self._alpha) * np.power(word_cnt, self._beta) else: - score = self._alpha * np.log(lm) \ - + self._beta * np.log(word_cnt) + score = self._alpha * np.log(lm) + self._beta * np.log(word_cnt) return score diff --git a/tune.py b/tune.py index e26bc45ce..4e9e268fb 100644 --- a/tune.py +++ b/tune.py @@ -77,7 +77,7 @@ parser.add_argument( help="Width for beam search decoding. (default: %(default)d)") parser.add_argument( "--language_model_path", - default="lm/data/en.00.UNKNOWN.klm", + default="lm/data/1Billion.klm", type=str, help="Path for language model. (default: %(default)s)") parser.add_argument( From 94a68116601a7be2490a5c48dbe4b73c5d7605b5 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Thu, 6 Jul 2017 11:25:05 +0800 Subject: [PATCH 083/335] code cleanup for the deployment decoder --- deploy/ctc_beam_search_decoder.cpp | 72 +++++++++++++++++++----------- deploy/ctc_beam_search_decoder.h | 34 ++++++++++---- deploy/decoder_setup.py | 7 ++- deploy/scorer.cpp | 14 +++++- deploy/scorer.h | 20 +++++++-- deploy/scorer_setup.py | 6 +-- 6 files changed, 105 insertions(+), 48 deletions(-) diff --git a/deploy/ctc_beam_search_decoder.cpp b/deploy/ctc_beam_search_decoder.cpp index 68d1a8457..a684b30a6 100644 --- a/deploy/ctc_beam_search_decoder.cpp +++ b/deploy/ctc_beam_search_decoder.cpp @@ -6,35 +6,47 @@ #include "ctc_beam_search_decoder.h" template -bool pair_comp_first_rev(const std::pair a, const std::pair b) { +bool pair_comp_first_rev(const std::pair a, const std::pair b) +{ return a.first > b.first; } template -bool pair_comp_second_rev(const std::pair a, const std::pair b) { +bool pair_comp_second_rev(const std::pair a, const std::pair b) +{ return a.second > b.second; } -/* CTC beam search decoder in C++, the interface is consistent with the original - decoder in Python version. -*/ std::vector > - ctc_beam_search_decoder(std::vector > probs_seq, - int beam_size, - std::vector vocabulary, - int blank_id, - double cutoff_prob, - Scorer *ext_scorer, - bool nproc - ) -{ + ctc_beam_search_decoder(std::vector > probs_seq, + int beam_size, + std::vector vocabulary, + int blank_id, + double cutoff_prob, + Scorer *ext_scorer, + bool nproc) { + // dimension check int num_time_steps = probs_seq.size(); + for (int i=0; i vocabulary.size()) { + std::cout<<"Invalid blank_id!"<::iterator it = std::find(vocabulary.begin(), vocabulary.end(), " "); - int space_id = it-vocabulary.begin(); + std::vector::iterator it = std::find(vocabulary.begin(), + vocabulary.end(), " "); + int space_id = it - vocabulary.begin(); if(space_id >= vocabulary.size()) { - std::cout<<"The character space is not in the vocabulary!"; + std::cout<<"The character space is not in the vocabulary!"< > } // pruning of vacobulary if (cutoff_prob < 1.0) { - std::sort(prob_idx.begin(), prob_idx.end(), pair_comp_second_rev); + std::sort(prob_idx.begin(), prob_idx.end(), + pair_comp_second_rev); float cum_prob = 0.0; int cutoff_len = 0; for (int i=0; i > cutoff_len += 1; if (cum_prob >= cutoff_prob) break; } - prob_idx = std::vector >(prob_idx.begin(), prob_idx.begin()+cutoff_len); + prob_idx = std::vector >( prob_idx.begin(), + prob_idx.begin() + cutoff_len); } // extend prefix for (std::map::iterator it = prefix_set_prev.begin(); @@ -82,11 +96,11 @@ std::vector > int c = prob_idx[index].first; double prob_c = prob_idx[index].second; if (c == blank_id) { - probs_b_cur[l] += prob_c*(probs_b_prev[l]+probs_nb_prev[l]); + probs_b_cur[l] += prob_c * (probs_b_prev[l] + probs_nb_prev[l]); } else { std::string last_char = l.substr(l.size()-1, 1); std::string new_char = vocabulary[c]; - std::string l_plus = l+new_char; + std::string l_plus = l + new_char; if( prefix_set_next.find(l_plus) == prefix_set_next.end()) { probs_b_cur[l_plus] = probs_nb_cur[l_plus] = 0.0; @@ -105,19 +119,22 @@ std::vector > probs_nb_cur[l_plus] += prob_c * ( probs_b_prev[l] + probs_nb_prev[l]); } - prefix_set_next[l_plus] = probs_nb_cur[l_plus]+probs_b_cur[l_plus]; + prefix_set_next[l_plus] = probs_nb_cur[l_plus] + probs_b_cur[l_plus]; } } - prefix_set_next[l] = probs_b_cur[l]+probs_nb_cur[l]; + prefix_set_next[l] = probs_b_cur[l] + probs_nb_cur[l]; } probs_b_prev = probs_b_cur; probs_nb_prev = probs_nb_cur; std::vector > - prefix_vec_next(prefix_set_next.begin(), prefix_set_next.end()); - std::sort(prefix_vec_next.begin(), prefix_vec_next.end(), pair_comp_second_rev); - int k = beam_size); + int k = beam_size (prefix_vec_next.begin(), prefix_vec_next.begin()+k); } @@ -138,6 +155,7 @@ std::vector > } } // sort the result and return - std::sort(beam_result.begin(), beam_result.end(), pair_comp_first_rev); + std::sort(beam_result.begin(), beam_result.end(), + pair_comp_first_rev); return beam_result; } diff --git a/deploy/ctc_beam_search_decoder.h b/deploy/ctc_beam_search_decoder.h index d23252ace..a4bb6aa74 100644 --- a/deploy/ctc_beam_search_decoder.h +++ b/deploy/ctc_beam_search_decoder.h @@ -6,14 +6,30 @@ #include #include "scorer.h" -std::vector > - ctc_beam_search_decoder(std::vector > probs_seq, - int beam_size, - std::vector vocabulary, - int blank_id=0, - double cutoff_prob=1.0, - Scorer *ext_scorer=NULL, - bool nproc=false - ); +/* CTC Beam Search Decoder, the interface is consistent with the + * original decoder in Python version. + + * Parameters: + * probs_seq: 2-D vector that each element is a vector of probabilities + * over vocabulary of one time step. + * beam_size: The width of beam search. + * vocabulary: A vector of vocabulary. + * blank_id: ID of blank. + * cutoff_prob: Cutoff probability of pruning + * ext_scorer: External scorer to evaluate a prefix. + * nproc: Whether this function used in multiprocessing. + * Return: + * A vector that each element is a pair of score and decoding result, + * in desending order. +*/ +std::vector > + ctc_beam_search_decoder(std::vector > probs_seq, + int beam_size, + std::vector vocabulary, + int blank_id, + double cutoff_prob=1.0, + Scorer *ext_scorer=NULL, + bool nproc=false + ); #endif // CTC_BEAM_SEARCH_DECODER_H_ diff --git a/deploy/decoder_setup.py b/deploy/decoder_setup.py index 5201172b1..4ed603b25 100644 --- a/deploy/decoder_setup.py +++ b/deploy/decoder_setup.py @@ -10,8 +10,8 @@ def compile_test(header, library): return os.system(command) == 0 -FILES = glob.glob('util/*.cc') + glob.glob('lm/*.cc') + glob.glob( - 'util/double-conversion/*.cc') +FILES = glob.glob('kenlm/util/*.cc') + glob.glob('kenlm/lm/*.cc') + glob.glob( + 'kenlm/util/double-conversion/*.cc') FILES = [ fn for fn in FILES if not (fn.endswith('main.cc') or fn.endswith('test.cc')) ] @@ -44,7 +44,7 @@ ctc_beam_search_decoder_module = [ 'ctc_beam_search_decoder.cpp' ], language='C++', - include_dirs=['.'], + include_dirs=['.', './kenlm'], libraries=LIBS, extra_compile_args=ARGS) ] @@ -52,7 +52,6 @@ ctc_beam_search_decoder_module = [ setup( name='swig_ctc_beam_search_decoder', version='0.1', - author='Yibing Liu', description="""CTC beam search decoder""", ext_modules=ctc_beam_search_decoder_module, py_modules=['swig_ctc_beam_search_decoder'], ) diff --git a/deploy/scorer.cpp b/deploy/scorer.cpp index d7f68d71f..1b843402b 100644 --- a/deploy/scorer.cpp +++ b/deploy/scorer.cpp @@ -1,5 +1,4 @@ #include - #include "scorer.h" #include "lm/model.hh" #include "util/tokenize_piece.hh" @@ -17,6 +16,13 @@ Scorer::~Scorer(){ delete (Model *)this->_language_model; } +/* Strip a input sentence + * Parameters: + * str: A reference to the objective string + * ch: The character to prune + * Return: + * void + */ inline void strip(std::string &str, char ch=' ') { if (str.size() == 0) return; int start = 0; @@ -69,10 +75,14 @@ double Scorer::language_model_score(std::string sentence) { } //log10 prob double log_prob = ret.prob; - return log_prob; } +void Scorer::reset_params(float alpha, float beta) { + this->_alpha = alpha; + this->_beta = beta; +} + double Scorer::get_score(std::string sentence) { double lm_score = language_model_score(sentence); int word_cnt = word_count(sentence); diff --git a/deploy/scorer.h b/deploy/scorer.h index 47261bb51..7b305772c 100644 --- a/deploy/scorer.h +++ b/deploy/scorer.h @@ -3,20 +3,34 @@ #include +/* External scorer to evaluate a prefix or a complete sentence + * when a new word appended during decoding, consisting of word + * count and language model scoring. + * Example: + * Scorer ext_scorer(alpha, beta, "path_to_language_model.klm"); + * double score = ext_scorer.get_score("sentence_to_score"); + */ class Scorer{ private: float _alpha; float _beta; void *_language_model; + // word insertion term + int word_count(std::string); + // n-gram language model scoring + double language_model_score(std::string); + public: Scorer(){} Scorer(float alpha, float beta, std::string lm_model_path); ~Scorer(); - int word_count(std::string); - double language_model_score(std::string); + + // reset params alpha & beta + void reset_params(float alpha, float beta); + // get the final score double get_score(std::string); }; -#endif +#endif //SCORER_H_ diff --git a/deploy/scorer_setup.py b/deploy/scorer_setup.py index c0006e071..3bb582724 100644 --- a/deploy/scorer_setup.py +++ b/deploy/scorer_setup.py @@ -10,8 +10,8 @@ def compile_test(header, library): return os.system(command) == 0 -FILES = glob.glob('util/*.cc') + glob.glob('lm/*.cc') + glob.glob( - 'util/double-conversion/*.cc') +FILES = glob.glob('kenlm/util/*.cc') + glob.glob('kenlm/lm/*.cc') + glob.glob( + 'kenlm/util/double-conversion/*.cc') FILES = [ fn for fn in FILES if not (fn.endswith('main.cc') or fn.endswith('test.cc')) ] @@ -41,7 +41,7 @@ ext_modules = [ name='_swig_scorer', sources=FILES + ['scorer_wrap.cxx', 'scorer.cpp'], language='C++', - include_dirs=['.'], + include_dirs=['.', './kenlm'], libraries=LIBS, extra_compile_args=ARGS) ] From 5bfa066920d326460a7f3ba2ccb67a5bb5a89787 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Thu, 6 Jul 2017 12:18:09 +0800 Subject: [PATCH 084/335] add setup and README for deployment --- deploy/README.md | 38 ++++++++++++++++++++++++++++++++++++++ deploy/setup.sh | 11 +++++++++++ 2 files changed, 49 insertions(+) create mode 100644 deploy/README.md create mode 100644 deploy/setup.sh diff --git a/deploy/README.md b/deploy/README.md new file mode 100644 index 000000000..c8dbd1c12 --- /dev/null +++ b/deploy/README.md @@ -0,0 +1,38 @@ +### Installation +The setup of the decoder for deployment depends on the source code of [kenlm](https://github.com/kpu/kenlm/), first clone it to current directory (i.e., `deep_speech_2/deploy`) + +```shell +git clone https://github.com/kpu/kenlm.git +``` + +Then run the setup + +```shell +sh setup.sh +``` + +After the installation succeeds, go back to the parent directory + +``` +cd .. +``` + +### Deployment + +For GPU deployment + +``` +CUDA_VISIBLE_DEVICES=0 python deploy.py +``` + +For CPU deployment + +``` +python deploy.py --use_gpu=False +``` + +More help for arguments + +``` +python deploy.py --help +``` diff --git a/deploy/setup.sh b/deploy/setup.sh new file mode 100644 index 000000000..e84cd9235 --- /dev/null +++ b/deploy/setup.sh @@ -0,0 +1,11 @@ +echo "Run decoder setup ..." + +python decoder_setup.py install +rm -r ./build + +echo "\nRun scorer setup ..." + +python scorer_setup.py install +rm -r ./build + +echo "\nFinish the installation of decoder and scorer." From d15c48d616b299f7a91ad71a49673cc2eeb7ab56 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Tue, 11 Jul 2017 13:32:35 +0800 Subject: [PATCH 085/335] upload the language model --- README.md | 38 ++++++++++++++++++++++++++++++++++++-- evaluate.py | 3 ++- infer.py | 2 +- lm/run.sh | 20 ++++++++++++++++++-- tune.py | 2 +- 5 files changed, 58 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 41acf1020..48f4b0db7 100644 --- a/README.md +++ b/README.md @@ -66,12 +66,36 @@ More help for arguments: python train.py --help ``` -### Inferencing +### Preparing language model + +The following steps, inference, parameters tuning and evaluating, will require a language model during decoding. +A compressed language model is provided and can be accessed by + +``` +cd ./lm +sh run.sh +``` + +After the downloading is completed, then + +``` +cd .. +``` + +### Inference + +For GPU inference ``` CUDA_VISIBLE_DEVICES=0 python infer.py ``` +For CPU inference + +``` +python infer.py --use_gpu=False +``` + More help for arguments: ``` @@ -92,14 +116,24 @@ python evaluate.py --help ### Parameters tuning -Parameters tuning for the CTC beam search decoder +Usually, the parameters $\alpha$ and $\beta$ for the CTC [prefix beam search](https://arxiv.org/abs/1408.2873) decoder need to be tuned after retraining the acoustic model. + +For GPU tuning ``` CUDA_VISIBLE_DEVICES=0 python tune.py ``` +For CPU tuning + +``` +python tune.py --use_gpu=False +``` + More help for arguments: ``` python tune.py --help ``` + +Then reset parameters with the tuning result before inference or evaluating. diff --git a/evaluate.py b/evaluate.py index a4f2a690a..00516dcbf 100644 --- a/evaluate.py +++ b/evaluate.py @@ -62,7 +62,7 @@ parser.add_argument( ) parser.add_argument( "--language_model_path", - default="lm/data/1Billion.klm", + default="lm/data/common_crawl_00.prune01111.trie.klm", type=str, help="Path for language model. (default: %(default)s)") parser.add_argument( @@ -139,6 +139,7 @@ def evaluate(): batch_reader = data_generator.batch_reader_creator( manifest_path=args.decode_manifest_path, batch_size=args.batch_size, + min_batch_size=1, sortagrad=False, shuffle_method=None) diff --git a/infer.py b/infer.py index dc1430804..bb81feac1 100644 --- a/infer.py +++ b/infer.py @@ -89,7 +89,7 @@ parser.add_argument( help="Number of output per sample in beam search. (default: %(default)d)") parser.add_argument( "--language_model_path", - default="lm/data/1Billion.klm", + default="lm/data/common_crawl_00.prune01111.trie.klm", type=str, help="Path for language model. (default: %(default)s)") parser.add_argument( diff --git a/lm/run.sh b/lm/run.sh index bf523740c..2108ea55f 100644 --- a/lm/run.sh +++ b/lm/run.sh @@ -1,3 +1,19 @@ -echo "Downloading language model." +echo "Downloading language model ..." + +mkdir data + +LM=common_crawl_00.prune01111.trie.klm +MD5="099a601759d467cd0a8523ff939819c5" + +wget -c http://paddlepaddle.bj.bcebos.com/model_zoo/speech/$LM -P ./data + +echo "Checking md5sum ..." +md5_tmp=`md5sum ./data/$LM | awk -F[' '] '{print $1}'` + +if [ $MD5 != $md5_tmp ]; then + echo "Fail to download the language model!" + exit 1 +fi + + -wget -c ftp://xxx/xxx/en.00.UNKNOWN.klm -P ./data diff --git a/tune.py b/tune.py index 4e9e268fb..19a2d5595 100644 --- a/tune.py +++ b/tune.py @@ -77,7 +77,7 @@ parser.add_argument( help="Width for beam search decoding. (default: %(default)d)") parser.add_argument( "--language_model_path", - default="lm/data/1Billion.klm", + default="lm/data/common_crawl_00.prune01111.trie.klm", type=str, help="Path for language model. (default: %(default)s)") parser.add_argument( From 8ce954671084c55a83e0008aee54395ab76c9670 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Tue, 11 Jul 2017 14:56:37 +0800 Subject: [PATCH 086/335] modify README.md --- README.md | 5 ----- 1 file changed, 5 deletions(-) diff --git a/README.md b/README.md index 48f4b0db7..3b20bf494 100644 --- a/README.md +++ b/README.md @@ -74,11 +74,6 @@ A compressed language model is provided and can be accessed by ``` cd ./lm sh run.sh -``` - -After the downloading is completed, then - -``` cd .. ``` From ccea7c01503c2b15c5860bccf59ed9fa48f2c5e8 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Mon, 10 Jul 2017 11:34:47 +0800 Subject: [PATCH 087/335] enable loading language model in multiple format --- deploy.py | 6 +++++- deploy/scorer.cpp | 18 ++++++++++++------ deploy/setup.sh | 4 ++-- 3 files changed, 19 insertions(+), 9 deletions(-) diff --git a/deploy.py b/deploy.py index d8a7e5b27..02152b499 100644 --- a/deploy.py +++ b/deploy.py @@ -14,6 +14,7 @@ from swig_ctc_beam_search_decoder import * from swig_scorer import Scorer from error_rate import wer import utils +import time parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( @@ -74,7 +75,7 @@ parser.add_argument( ) parser.add_argument( "--beam_size", - default=500, + default=200, type=int, help="Width for beam search decoding. (default: %(default)d)") parser.add_argument( @@ -166,6 +167,7 @@ def infer(): ext_scorer = Scorer(args.alpha, args.beta, args.language_model_path) ## decode and print + time_begin = time.time() wer_sum, wer_counter = 0, 0 for i, probs in enumerate(probs_split): beam_result = ctc_beam_search_decoder( @@ -183,6 +185,8 @@ def infer(): wer_counter += 1 print("cur wer = %f , average wer = %f" % (wer_cur, wer_sum / wer_counter)) + time_end = time.time() + print("total time = %f" % (time_end - time_begin)) def main(): diff --git a/deploy/scorer.cpp b/deploy/scorer.cpp index 1b843402b..d438ec1bd 100644 --- a/deploy/scorer.cpp +++ b/deploy/scorer.cpp @@ -1,4 +1,5 @@ #include +#include #include "scorer.h" #include "lm/model.hh" #include "util/tokenize_piece.hh" @@ -9,11 +10,16 @@ using namespace lm::ngram; Scorer::Scorer(float alpha, float beta, std::string lm_model_path) { this->_alpha = alpha; this->_beta = beta; - this->_language_model = new Model(lm_model_path.c_str()); + + if (access(lm_model_path.c_str(), F_OK) != 0) { + std::cout<<"Invalid language model path!"<_language_model = LoadVirtual(lm_model_path.c_str()); } Scorer::~Scorer(){ - delete (Model *)this->_language_model; + delete (lm::base::Model *)this->_language_model; } /* Strip a input sentence @@ -63,14 +69,14 @@ int Scorer::word_count(std::string sentence) { } double Scorer::language_model_score(std::string sentence) { - Model *model = (Model *)this->_language_model; + lm::base::Model *model = (lm::base::Model *)this->_language_model; State state, out_state; lm::FullScoreReturn ret; - state = model->BeginSentenceState(); + model->BeginSentenceWrite(&state); for (util::TokenIter it(sentence, ' '); it; ++it){ - lm::WordIndex vocab = model->GetVocabulary().Index(*it); - ret = model->FullScore(state, vocab, out_state); + lm::WordIndex wid = model->BaseVocabulary().Index(*it); + ret = model->BaseFullScore(&state, wid, &out_state); state = out_state; } //log10 prob diff --git a/deploy/setup.sh b/deploy/setup.sh index e84cd9235..423f5b892 100644 --- a/deploy/setup.sh +++ b/deploy/setup.sh @@ -3,9 +3,9 @@ echo "Run decoder setup ..." python decoder_setup.py install rm -r ./build -echo "\nRun scorer setup ..." +echo "Run scorer setup ..." python scorer_setup.py install rm -r ./build -echo "\nFinish the installation of decoder and scorer." +echo "Finish the installation of decoder and scorer." From ee5abbe37d5a3e1fd8629a55d4d149ab5612c740 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Tue, 18 Jul 2017 10:14:37 +0800 Subject: [PATCH 088/335] add mfcc feature for DS2 --- README.md | 6 ++- compute_mean_std.py | 8 +++- data_utils/featurizer/audio_featurizer.py | 48 ++++++++++++++++++++-- data_utils/featurizer/speech_featurizer.py | 15 +++---- data_utils/normalizer.py | 2 +- requirements.txt | 1 + train.py | 7 ++++ 7 files changed, 74 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 3b20bf494..a92b671cb 100644 --- a/README.md +++ b/README.md @@ -38,7 +38,11 @@ python datasets/librispeech/librispeech.py --help python compute_mean_std.py ``` -`python compute_mean_std.py` computes mean and stdandard deviation for audio features, and save them to a file with a default name `./mean_std.npz`. This file will be used in both training and inferencing. +`python compute_mean_std.py` computes mean and stdandard deviation for audio features, and save them to a file with a default name `./mean_std.npz`. This file will be used in both training and inferencing. The default feature of audio data is power spectrum, currently the mfcc feature is also supported. To train and infer based on mfcc feature, you can regenerate this file by + +``` +python compute_mean_std.py --specgram_type mfcc +``` More help for arguments: diff --git a/compute_mean_std.py b/compute_mean_std.py index 9c301c93f..0cc84e730 100644 --- a/compute_mean_std.py +++ b/compute_mean_std.py @@ -10,6 +10,12 @@ from data_utils.featurizer.audio_featurizer import AudioFeaturizer parser = argparse.ArgumentParser( description='Computing mean and stddev for feature normalizer.') +parser.add_argument( + "--specgram_type", + default='linear', + type=str, + help="Feature type of audio data: 'linear' (power spectrum)" + " or 'mfcc'. (default: %(default)s)") parser.add_argument( "--manifest_path", default='datasets/manifest.train', @@ -39,7 +45,7 @@ args = parser.parse_args() def main(): augmentation_pipeline = AugmentationPipeline(args.augmentation_config) - audio_featurizer = AudioFeaturizer() + audio_featurizer = AudioFeaturizer(specgram_type=args.specgram_type) def augment_and_featurize(audio_segment): augmentation_pipeline.transform_audio(audio_segment) diff --git a/data_utils/featurizer/audio_featurizer.py b/data_utils/featurizer/audio_featurizer.py index 4b4d02c60..271e535b6 100644 --- a/data_utils/featurizer/audio_featurizer.py +++ b/data_utils/featurizer/audio_featurizer.py @@ -6,13 +6,15 @@ from __future__ import print_function import numpy as np from data_utils import utils from data_utils.audio import AudioSegment +from python_speech_features import mfcc +from python_speech_features import delta class AudioFeaturizer(object): """Audio featurizer, for extracting features from audio contents of AudioSegment or SpeechSegment. - Currently, it only supports feature type of linear spectrogram. + Currently, it supports feature types of linear spectrogram and mfcc. :param specgram_type: Specgram feature type. Options: 'linear'. :type specgram_type: str @@ -20,9 +22,10 @@ class AudioFeaturizer(object): :type stride_ms: float :param window_ms: Window size (in milliseconds) for generating frames. :type window_ms: float - :param max_freq: Used when specgram_type is 'linear', only FFT bins + :param max_freq: When specgram_type is 'linear', only FFT bins corresponding to frequencies between [0, max_freq] are - returned. + returned; when specgram_type is 'mfcc', max_feq is the + highest band edge of mel filters. :types max_freq: None|float :param target_sample_rate: Audio are resampled (if upsampling or downsampling is allowed) to this before @@ -91,6 +94,9 @@ class AudioFeaturizer(object): return self._compute_linear_specgram( samples, sample_rate, self._stride_ms, self._window_ms, self._max_freq) + elif self._specgram_type == 'mfcc': + return self._compute_mfcc(samples, sample_rate, self._stride_ms, + self._window_ms, self._max_freq) else: raise ValueError("Unknown specgram_type %s. " "Supported values: linear." % self._specgram_type) @@ -142,3 +148,39 @@ class AudioFeaturizer(object): # prepare fft frequency list freqs = float(sample_rate) / window_size * np.arange(fft.shape[0]) return fft, freqs + + def _compute_mfcc(self, + samples, + sample_rate, + stride_ms=10.0, + window_ms=20.0, + max_freq=None): + """Compute mfcc from samples.""" + if max_freq is None: + max_freq = sample_rate / 2 + if max_freq > sample_rate / 2: + raise ValueError("max_freq must be greater than half of " + "sample rate.") + if stride_ms > window_ms: + raise ValueError("Stride size must not be greater than " + "window size.") + # compute 13 cepstral coefficients, and the first one is replaced + # by log(frame energy) + mfcc_feat = mfcc( + signal=samples, + samplerate=sample_rate, + winlen=0.001 * window_ms, + winstep=0.001 * stride_ms, + highfreq=max_freq) + # Deltas + d_mfcc_feat = delta(mfcc_feat, 2) + # Deltas-Deltas + dd_mfcc_feat = delta(d_mfcc_feat, 2) + # concat above three features + concat_mfcc_feat = [ + np.concatenate((mfcc_feat[i], d_mfcc_feat[i], dd_mfcc_feat[i])) + for i in xrange(len(mfcc_feat)) + ] + # transpose to be consistent with the linear specgram situation + concat_mfcc_feat = np.transpose(concat_mfcc_feat) + return concat_mfcc_feat diff --git a/data_utils/featurizer/speech_featurizer.py b/data_utils/featurizer/speech_featurizer.py index 26283892e..a947588db 100644 --- a/data_utils/featurizer/speech_featurizer.py +++ b/data_utils/featurizer/speech_featurizer.py @@ -11,23 +11,24 @@ class SpeechFeaturizer(object): """Speech featurizer, for extracting features from both audio and transcript contents of SpeechSegment. - Currently, for audio parts, it only supports feature type of linear - spectrogram; for transcript parts, it only supports char-level tokenizing - and conversion into a list of token indices. Note that the token indexing - order follows the given vocabulary file. + Currently, for audio parts, it supports feature types of linear + spectrogram and mfcc; for transcript parts, it only supports char-level + tokenizing and conversion into a list of token indices. Note that the + token indexing order follows the given vocabulary file. :param vocab_filepath: Filepath to load vocabulary for token indices conversion. :type specgram_type: basestring - :param specgram_type: Specgram feature type. Options: 'linear'. + :param specgram_type: Specgram feature type. Options: 'linear', 'mfcc'. :type specgram_type: str :param stride_ms: Striding size (in milliseconds) for generating frames. :type stride_ms: float :param window_ms: Window size (in milliseconds) for generating frames. :type window_ms: float - :param max_freq: Used when specgram_type is 'linear', only FFT bins + :param max_freq: When specgram_type is 'linear', only FFT bins corresponding to frequencies between [0, max_freq] are - returned. + returned; when specgram_type is 'mfcc', max_freq is the + highest band edge of mel filters. :types max_freq: None|float :param target_sample_rate: Speech are resampled (if upsampling or downsampling is allowed) to this before diff --git a/data_utils/normalizer.py b/data_utils/normalizer.py index c123d25d2..1f4aae9a0 100644 --- a/data_utils/normalizer.py +++ b/data_utils/normalizer.py @@ -16,7 +16,7 @@ class FeatureNormalizer(object): if mean_std_filepath is provided (not None), the normalizer will directly initilize from the file. Otherwise, both manifest_path and featurize_func should be given for on-the-fly mean and stddev computing. - + :param mean_std_filepath: File containing the pre-computed mean and stddev. :type mean_std_filepath: None|basestring :param manifest_path: Manifest of instances for computing mean and stddev. diff --git a/requirements.txt b/requirements.txt index 2ae7d0895..721fa2811 100755 --- a/requirements.txt +++ b/requirements.txt @@ -2,3 +2,4 @@ wget==3.2 scipy==0.13.1 resampy==0.1.5 https://github.com/kpu/kenlm/archive/master.zip +python_speech_features diff --git a/train.py b/train.py index 3a2d0cad9..6481074c6 100644 --- a/train.py +++ b/train.py @@ -53,6 +53,12 @@ parser.add_argument( default=True, type=distutils.util.strtobool, help="Use sortagrad or not. (default: %(default)s)") +parser.add_argument( + "--specgram_type", + default='linear', + type=str, + help="Feature type of audio data: 'linear' (power spectrum)" + " or 'mfcc'. (default: %(default)s)") parser.add_argument( "--max_duration", default=27.0, @@ -130,6 +136,7 @@ def train(): augmentation_config=args.augmentation_config, max_duration=args.max_duration, min_duration=args.min_duration, + specgram_type=args.specgram_type, num_threads=args.num_threads_data) train_generator = data_generator() From 724ef185966a379ceca0caa1d0b2200e42bf32f3 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Wed, 19 Jul 2017 22:40:01 +0800 Subject: [PATCH 089/335] update several scripts to support mfcc --- README.md | 2 ++ evaluate.py | 7 +++++++ infer.py | 7 +++++++ tune.py | 7 +++++++ 4 files changed, 23 insertions(+) diff --git a/README.md b/README.md index a92b671cb..24f0b3c3f 100644 --- a/README.md +++ b/README.md @@ -44,6 +44,8 @@ python compute_mean_std.py python compute_mean_std.py --specgram_type mfcc ``` +and specify the ```specgram_type``` to ```mfcc``` in each step, including training, inference etc. + More help for arguments: ``` diff --git a/evaluate.py b/evaluate.py index 00516dcbf..19eabf4e5 100644 --- a/evaluate.py +++ b/evaluate.py @@ -86,6 +86,12 @@ parser.add_argument( default=500, type=int, help="Width for beam search decoding. (default: %(default)d)") +parser.add_argument( + "--specgram_type", + default='linear', + type=str, + help="Feature type of audio data: 'linear' (power spectrum)" + " or 'mfcc'. (default: %(default)s)") parser.add_argument( "--decode_manifest_path", default='datasets/manifest.test', @@ -111,6 +117,7 @@ def evaluate(): vocab_filepath=args.vocab_filepath, mean_std_filepath=args.mean_std_filepath, augmentation_config='{}', + specgram_type=args.specgram_type, num_threads=args.num_threads_data) # create network config diff --git a/infer.py b/infer.py index bb81feac1..817526302 100644 --- a/infer.py +++ b/infer.py @@ -51,6 +51,12 @@ parser.add_argument( default=multiprocessing.cpu_count(), type=int, help="Number of cpu processes for beam search. (default: %(default)s)") +parser.add_argument( + "--specgram_type", + default='linear', + type=str, + help="Feature type of audio data: 'linear' (power spectrum)" + " or 'mfcc'. (default: %(default)s)") parser.add_argument( "--mean_std_filepath", default='mean_std.npz', @@ -118,6 +124,7 @@ def infer(): vocab_filepath=args.vocab_filepath, mean_std_filepath=args.mean_std_filepath, augmentation_config='{}', + specgram_type=args.specgram_type, num_threads=args.num_threads_data) # create network config diff --git a/tune.py b/tune.py index 19a2d5595..2fcca4862 100644 --- a/tune.py +++ b/tune.py @@ -50,6 +50,12 @@ parser.add_argument( default=multiprocessing.cpu_count(), type=int, help="Number of cpu processes for beam search. (default: %(default)s)") +parser.add_argument( + "--specgram_type", + default='linear', + type=str, + help="Feature type of audio data: 'linear' (power spectrum)" + " or 'mfcc'. (default: %(default)s)") parser.add_argument( "--mean_std_filepath", default='mean_std.npz', @@ -133,6 +139,7 @@ def tune(): vocab_filepath=args.vocab_filepath, mean_std_filepath=args.mean_std_filepath, augmentation_config='{}', + specgram_type=args.specgram_type, num_threads=args.num_threads_data) # create network config From cb0680e8c49ffa23d2fb7857d1a3fd39d6e48ac1 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Thu, 20 Jul 2017 11:47:46 +0800 Subject: [PATCH 090/335] follow comments to modify README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 24f0b3c3f..3010c0e53 100644 --- a/README.md +++ b/README.md @@ -38,13 +38,13 @@ python datasets/librispeech/librispeech.py --help python compute_mean_std.py ``` -`python compute_mean_std.py` computes mean and stdandard deviation for audio features, and save them to a file with a default name `./mean_std.npz`. This file will be used in both training and inferencing. The default feature of audio data is power spectrum, currently the mfcc feature is also supported. To train and infer based on mfcc feature, you can regenerate this file by +It will compute mean and stdandard deviation for audio features, and save them to a file with a default name `./mean_std.npz`. This file will be used in both training and inferencing. The default feature of audio data is power spectrum, and the mfcc feature is also supported. To train and infer based on mfcc feature, please generate this file by ``` python compute_mean_std.py --specgram_type mfcc ``` -and specify the ```specgram_type``` to ```mfcc``` in each step, including training, inference etc. +and specify ```--specgram_type mfcc``` when running train.py, infer.py, evaluator.py or tune.py. More help for arguments: From c7676286ab99d3b30d1c79e44ee30e20d65bd302 Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Wed, 26 Jul 2017 16:25:11 +0800 Subject: [PATCH 091/335] install libsndfile from /usr to thirdparty --- .gitignore | 1 + README.md | 3 --- requirements.txt | 3 ++- setup.sh | 13 +++++-------- 4 files changed, 8 insertions(+), 12 deletions(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..0e0f559f1 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +thirdparty diff --git a/README.md b/README.md index 3010c0e53..22d0c5386 100644 --- a/README.md +++ b/README.md @@ -8,9 +8,6 @@ Please replace `$PADDLE_INSTALL_DIR` with your own paddle installation directory sh setup.sh export LD_LIBRARY_PATH=$PADDLE_INSTALL_DIR/Paddle/third_party/install/warpctc/lib:$LD_LIBRARY_PATH ``` - -For some machines, we also need to install libsndfile1. Details to be added. - ## Usage ### Preparing Data diff --git a/requirements.txt b/requirements.txt index 721fa2811..3f73ea8b8 100755 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ wget==3.2 scipy==0.13.1 resampy==0.1.5 -https://github.com/kpu/kenlm/archive/master.zip +SoundFile==0.9.0.post1 python_speech_features +https://github.com/kpu/kenlm/archive/master.zip diff --git a/setup.sh b/setup.sh index 8cba91ecd..854f879e9 100644 --- a/setup.sh +++ b/setup.sh @@ -9,7 +9,9 @@ if [ $? != 0 ]; then exit 1 fi -# install package Soundfile +# install package libsndfile +DIR="$( cd "$(dirname "$0")" ; pwd -P )" +mkdir thirdparty curl -O "http://www.mega-nerd.com/libsndfile/files/libsndfile-1.0.28.tar.gz" if [ $? != 0 ]; then echo "Download libsndfile-1.0.28.tar.gz failed !!!" @@ -17,15 +19,10 @@ if [ $? != 0 ]; then fi tar -zxvf libsndfile-1.0.28.tar.gz cd libsndfile-1.0.28 -./configure && make && make install -cd - +./configure --prefix=$DIR/thirdparty/libsndfile && make && make install +cd .. rm -rf libsndfile-1.0.28 rm libsndfile-1.0.28.tar.gz -pip install SoundFile==0.9.0.post1 -if [ $? != 0 ]; then - echo "Install SoundFile failed !!!" - exit 1 -fi # prepare ./checkpoints mkdir checkpoints From 9fa9a352ac46c2547fcedfa9def201e7ed06d760 Mon Sep 17 00:00:00 2001 From: wanghaoshuang Date: Thu, 27 Jul 2017 13:53:37 +0800 Subject: [PATCH 092/335] Refine submitting scripts for deepspeech2 on paddle cloud. --- cloud/README.md | 45 +++++++++++++++++ .../pcloud_prepare_data.py | 32 ++++++++---- cloud/pcloud_split_data.py | 50 +++++++++++++++++++ cloud/pcloud_submit.sh | 17 +++++++ cloud/pcloud_train.sh | 37 ++++++++++++++ data_utils/data.py | 13 +++-- pcloud_split_data.py | 47 ----------------- pcloud_submit.sh | 13 ----- pcloud_train.sh | 31 +++++++----- 9 files changed, 197 insertions(+), 88 deletions(-) create mode 100644 cloud/README.md rename datasets/librispeech/pcloud_data.py => cloud/pcloud_prepare_data.py (61%) create mode 100644 cloud/pcloud_split_data.py create mode 100644 cloud/pcloud_submit.sh create mode 100644 cloud/pcloud_train.sh delete mode 100644 pcloud_split_data.py delete mode 100644 pcloud_submit.sh diff --git a/cloud/README.md b/cloud/README.md new file mode 100644 index 000000000..91a1d52a7 --- /dev/null +++ b/cloud/README.md @@ -0,0 +1,45 @@ +#DeepSpeech2 on paddle cloud + +## Run DS2 by public data + +**Step1: ** Make sure current dir is `models/deep_speech_2/cloud/` + +**Step2:** Submit job by cmd: `sh pcloud_submit.sh` + +``` +$ sh pcloud_submit.sh +$ uploading: deepspeech.tar.gz... +$ uploading: pcloud_prepare_data.py... +$ uploading: pcloud_split_data.py... +$ uploading: pcloud_submit.sh... +$ uploading: pcloud_train.sh... +$ deepspeech20170727130129 submited. +``` +The we can get job name 'deepspeech20170727130129' at last line + +**Step3:** Get logs from paddle cloud by cmd: `paddlecloud logs -n 10000 deepspeech20170727130129`. + +``` +$ paddlecloud logs -n 10000 deepspeech20170727130129 +$ ==========================deepspeech20170727130129-trainer-6vk3m========================== +label selector: paddle-job-pserver=deepspeech20170727130129, desired: 1 +running pod list: [('Running', '10.1.3.6')] +label selector: paddle-job=deepspeech20170727130129, desired: 1 +running pod list: [('Running', '10.1.83.14')] +Starting training job: /pfs/dlnel/home/yanxu05@baidu.com/jobs/deepspeech20170727130129, num_gradient_servers: 1, trainer_id: 0, version: v2 +I0727 05:01:42.969719 25 Util.cpp:166] commandline: --num_gradient_servers=1 --ports_num_for_sparse=1 --use_gpu=1 --trainer_id=0 --pservers=10.1.3.6 --trainer_count=4 --num_passes=1 --ports_num=1 --port=7164 +[INFO 2017-07-27 05:01:50,279 layers.py:2430] output for __conv_0__: c = 32, h = 81, w = 54, size = 139968 +[WARNING 2017-07-27 05:01:50,280 layers.py:2789] brelu is not recommend for batch normalization's activation, maybe the relu is better +[INFO 2017-07-27 05:01:50,283 layers.py:2430] output for __conv_1__: c = 32, h = 41, w = 54, size = 70848 +[WARNING 2017-07-27 05:01:50,283 layers.py:2789] brelu is not recommend for batch normalization's activation, maybe the relu is better +[WARNING 2017-07-27 05:01:50,287 layers.py:2789] is not recommend for batch normalization's activation, maybe the relu is better +[WARNING 2017-07-27 05:01:50,291 layers.py:2789] is not recommend for batch normalization's activation, maybe the relu is better +[WARNING 2017-07-27 05:01:50,295 layers.py:2789] is not recommend for batch normalization's activation, maybe the relu is better +I0727 05:01:50.316176 25 MultiGradientMachine.cpp:99] numLogicalDevices=1 numThreads=4 numDevices=4 +I0727 05:01:50.454787 25 GradientMachine.cpp:85] Initing parameters.. +I0727 05:01:50.690007 25 GradientMachine.cpp:92] Init parameters done. +``` +[More optins and cmd aoubt paddle cloud](https://github.com/PaddlePaddle/cloud/blob/develop/doc/usage_cn.md) + +## Run DS2 by customize data +TODO diff --git a/datasets/librispeech/pcloud_data.py b/cloud/pcloud_prepare_data.py similarity index 61% rename from datasets/librispeech/pcloud_data.py rename to cloud/pcloud_prepare_data.py index 914001144..2ffdaf630 100644 --- a/datasets/librispeech/pcloud_data.py +++ b/cloud/pcloud_prepare_data.py @@ -1,23 +1,36 @@ +""" +This tool is used for preparing data for DeepSpeech2 trainning on paddle cloud. + +Steps: +1. Read original manifest and get the local path of sound files. +2. Tar all local sound files into one tar file. +3. Modify original manifest to remove the local path information. + +Finally, we will get a tar file and a manifest with sound file name, duration +and text. +""" import json import os import tarfile import sys import argparse +sys.path.append('../') +from data_utils.utils import read_manifest parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( "--manifest_path", - default="/manifest.train", + default="../datasets/manifest.train", type=str, help="Manifest of target data. (default: %(default)s)") parser.add_argument( "--out_tar_path", - default="/dev.tar", + default="./data/dev.tar", type=str, help="Output tar file path. (default: %(default)s)") parser.add_argument( "--out_manifest_path", - default="/dev.mani", + default="./data/dev.mani", type=str, help="Manifest of output data. (default: %(default)s)") args = parser.parse_args() @@ -29,19 +42,16 @@ def gen_pcloud_data(manifest_path, out_tar_path, out_manifest_path): 2. Generate a new manifest for output tar file ''' out_tar = tarfile.open(out_tar_path, 'w') - manifest = [] - for json_line in open(manifest_path): - try: - json_data = json.loads(json_line) - except Exception as e: - raise IOError("Error reading manifest: %s" % str(e)) + manifest = read_manifest(manifest_path) + results = [] + for json_data in manifest: sound_file = json_data['audio_filepath'] filename = os.path.basename(sound_file) out_tar.add(sound_file, arcname=filename) json_data['audio_filepath'] = filename - manifest.append("%s\n" % json.dumps(json_data)) + results.append("%s\n" % json.dumps(json_data)) with open(out_manifest_path, 'w') as out_manifest: - out_manifest.writelines(manifest) + out_manifest.writelines(results) out_manifest.close() out_tar.close() diff --git a/cloud/pcloud_split_data.py b/cloud/pcloud_split_data.py new file mode 100644 index 000000000..8f98799aa --- /dev/null +++ b/cloud/pcloud_split_data.py @@ -0,0 +1,50 @@ +""" +This tool is used for splitting data into each node of +paddle cloud by total trainer count and current trainer id. +The meaning of trainer is a instance of k8s cluster. +This script should be called in paddle cloud. +""" +import os +import json +import argparse + +parser = argparse.ArgumentParser(description=__doc__) +parser.add_argument( + "--in_manifest_path", + default='./cloud/data/dev.mani', + type=str, + help="Input manifest path. (default: %(default)s)") +parser.add_argument( + "--data_tar_path", + default='./cloud/data/dev.tar', + type=str, + help="Data tar file path. (default: %(default)s)") +parser.add_argument( + "--out_manifest_path", + default='./cloud/data/dev.mani.split', + type=str, + help="Out manifest file path. (default: %(default)s)") +args = parser.parse_args() + + +def split_data(in_manifest, tar_path, out_manifest): + with open("/trainer_id", "r") as f: + trainer_id = int(f.readline()[:-1]) + with open("/trainer_count", "r") as f: + trainer_count = int(f.readline()[:-1]) + + tar_path = os.path.abspath(tar_path) + result = [] + for index, json_line in enumerate(open(in_manifest)): + if (index % trainer_count) == trainer_id: + json_data = json.loads(json_line) + json_data['audio_filepath'] = "tar:%s#%s" % ( + tar_path, json_data['audio_filepath']) + result.append("%s\n" % json.dumps(json_data)) + with open(out_manifest, 'w') as manifest: + manifest.writelines(result) + + +if __name__ == '__main__': + split_data(args.in_manifest_path, args.data_tar_path, + args.out_manifest_path) diff --git a/cloud/pcloud_submit.sh b/cloud/pcloud_submit.sh new file mode 100644 index 000000000..5d0535011 --- /dev/null +++ b/cloud/pcloud_submit.sh @@ -0,0 +1,17 @@ +DS2_PATH=../ +tar -czf deepspeech.tar.gz ${DS2_PATH} +JOB_NAME=deepspeech`date +%Y%m%d%H%M%S` +cp pcloud_train.sh ${DS2_PATH} +paddlecloud submit \ +-image wanghaoshuang/pcloud_ds2:latest-gpu-cudnn \ +-jobname ${JOB_NAME} \ +-cpu 4 \ +-gpu 4 \ +-memory 10Gi \ +-parallelism 1 \ +-pscpu 1 \ +-pservers 1 \ +-psmemory 10Gi \ +-passes 1 \ +-entry "sh pcloud_train.sh" \ +. diff --git a/cloud/pcloud_train.sh b/cloud/pcloud_train.sh new file mode 100644 index 000000000..385281cef --- /dev/null +++ b/cloud/pcloud_train.sh @@ -0,0 +1,37 @@ +DATA_PATH=/pfs/dlnel/public/dataset/speech/libri +#setted by user +TRAIN_MANI=${DATA_PATH}/manifest_pcloud.train +#setted by user +DEV_MANI=${DATA_PATH}/manifest_pcloud.dev +#setted by user +TRAIN_TAR=${DATA_PATH}/data.train.tar +#setted by user +DEV_TAR=${DATA_PATH}/data.dev.tar +#setted by user +VOCAB_PATH=${DATA_PATH}/eng_vocab.txt +#setted by user +MEAN_STD_FILE=${DATA_PATH}/mean_std.npz + +tar -xzf deepspeech.tar.gz +rm -rf ./cloud/data/* + +# split train data for each pcloud node +python ./cloud/pcloud_split_data.py \ +--in_manifest_path=$TRAIN_MANI \ +--data_tar_path=$TRAIN_TAR \ +--out_manifest_path='./cloud/data/train.mani' + +# split dev data for each pcloud node +python pcloud_split_data.py \ +--in_manifest_path=$DEV_MANI \ +--data_tar_path=$DEV_TAR \ +--out_manifest_path='./cloud/data/dev.mani' + +python train.py \ +--use_gpu=1 \ +--trainer_count=4 \ +--batch_size=256 \ +--mean_std_filepath=$MEAN_STD_FILE \ +--train_manifest_path='./cloud/data/train.mani' \ +--dev_manifest_path='./cloud/data/dev.mani' \ +--vocab_filepath=$VOCAB_PATH \ diff --git a/data_utils/data.py b/data_utils/data.py index e1fa47477..5a5fa51b2 100644 --- a/data_utils/data.py +++ b/data_utils/data.py @@ -6,11 +6,11 @@ from __future__ import division from __future__ import print_function import random -import numpy as np +import tarfile import multiprocessing -from threading import local +import numpy as np import paddle.v2 as paddle -import tarfile +from threading import local from data_utils import utils from data_utils.augmentor.augmentation import AugmentationPipeline from data_utils.featurizer.speech_featurizer import SpeechFeaturizer @@ -52,6 +52,9 @@ class DataGenerator(object): :types max_freq: None|float :param specgram_type: Specgram feature type. Options: 'linear'. :type specgram_type: str + :param use_dB_normalization: Whether to normalize the audio to -20 dB + before extracting the features. + :type use_dB_normalization: bool :param num_threads: Number of CPU threads for processing data. :type num_threads: int :param random_seed: Random seed. @@ -68,6 +71,7 @@ class DataGenerator(object): window_ms=20.0, max_freq=None, specgram_type='linear', + use_dB_normalization=True, num_threads=multiprocessing.cpu_count(), random_seed=0): self._max_duration = max_duration @@ -80,7 +84,8 @@ class DataGenerator(object): specgram_type=specgram_type, stride_ms=stride_ms, window_ms=window_ms, - max_freq=max_freq) + max_freq=max_freq, + use_dB_normalization=use_dB_normalization) self._num_threads = num_threads self._rng = random.Random(random_seed) self._epoch = 0 diff --git a/pcloud_split_data.py b/pcloud_split_data.py deleted file mode 100644 index bf35383af..000000000 --- a/pcloud_split_data.py +++ /dev/null @@ -1,47 +0,0 @@ -import os -import json -import argparse - - -def split_data(inManifest, tar_path, outManifest): - trainer_id = 1 - trainer_count = 2 - #with open("/trainer_id", "r") as f: - # trainer_id = int(f.readline()[:-1]) - #with open("/trainer_count", "r") as f: - # trainer_count = int(f.readline()[:-1]) - - tarPath = os.path.abspath(tar_path) - result = [] - for index, json_line in enumerate(open(inManifest)): - if (index % trainer_count) == trainer_id: - json_data = json.loads(json_line) - json_data['audio_filepath'] = "tar:%s#%s" % ( - tarPath, json_data['audio_filepath']) - result.append("%s\n" % json.dumps(json_data)) - with open(outManifest, 'w') as manifest: - manifest.writelines(result) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser(description=__doc__) - - parser.add_argument( - "--in_manifest_path", - default='datasets/dev.mani', - type=str, - help="Input manifest path. (default: %(default)s)") - parser.add_argument( - "--data_tar_path", - default='datasets/dev.tar', - type=str, - help="Data tar file path. (default: %(default)s)") - parser.add_argument( - "--out_manifest_path", - default='datasets/dev.mani.split', - type=str, - help="Out manifest file path. (default: %(default)s)") - args = parser.parse_args() - - split_data(args.in_manifest_path, args.data_tar_path, - args.out_manifest_path) diff --git a/pcloud_submit.sh b/pcloud_submit.sh deleted file mode 100644 index 06e65110d..000000000 --- a/pcloud_submit.sh +++ /dev/null @@ -1,13 +0,0 @@ -paddlecloud submit \ --image wanghaoshuang/pcloud_ds2 \ --jobname ds23 \ --cpu 1 \ --gpu 0 \ --memory 10Gi \ --parallelism 1 \ --pscpu 1 \ --pservers 1 \ --psmemory 10Gi \ --passes 1 \ --entry "sh pcloud_train.sh" \ -./deep_speech_2 diff --git a/pcloud_train.sh b/pcloud_train.sh index fb6cbb9ec..b13e23e95 100644 --- a/pcloud_train.sh +++ b/pcloud_train.sh @@ -1,32 +1,37 @@ +DATA_PATH=/pfs/dlnel/public/dataset/speech/libri #setted by user -TRAIN_MANI='/pfs/dlnel/home/yanxu05@baidu.com/wanghaoshuang/data/ds2_data/demo.mani' +TRAIN_MANI=${DATA_PATH}/manifest_pcloud.train #setted by user -DEV_MANI='/pfs/dlnel/home/yanxu05@baidu.com/wanghaoshuang/data/ds2_data/demo.mani' +DEV_MANI=${DATA_PATH}/manifest_pcloud.dev #setted by user -TRAIN_TAR='/pfs/dlnel/home/yanxu05@baidu.com/wanghaoshuang/data/ds2_data/demo.tar' +TRAIN_TAR=${DATA_PATH}/data.train.tar #setted by user -DEV_TAR='/pfs/dlnel/home/yanxu05@baidu.com/wanghaoshuang/data/ds2_data/demo.tar' +DEV_TAR=${DATA_PATH}/data.dev.tar #setted by user -VOCAB_PATH='/pfs/dlnel/home/yanxu05@baidu.com/wanghaoshuang/data/ds2_data/eng_vocab.txt' +VOCAB_PATH=${DATA_PATH}/eng_vocab.txt #setted by user -MEAN_STD_FILE='/pfs/dlnel/home/yanxu05@baidu.com/wanghaoshuang/data/ds2_data/mean_std.npz' +MEAN_STD_FILE=${DATA_PATH}/mean_std.npz + +tar -xzvf deepspeech.tar.gz +rm -rf ./cloud/data/* # split train data for each pcloud node -python pcloud_split_data.py \ +python ./cloud/pcloud_split_data.py \ --in_manifest_path=$TRAIN_MANI \ --data_tar_path=$TRAIN_TAR \ ---out_manifest_path='./train.mani' +--out_manifest_path='./cloud/data/train.mani' + # split dev data for each pcloud node python pcloud_split_data.py \ --in_manifest_path=$DEV_MANI \ --data_tar_path=$DEV_TAR \ ---out_manifest_path='./dev.mani' +--out_manifest_path='./cloud/data/dev.mani' python train.py \ ---use_gpu=0 \ +--use_gpu=1 \ --trainer_count=4 \ ---batch_size=2 \ +--batch_size=256 \ --mean_std_filepath=$MEAN_STD_FILE \ ---train_manifest_path='./train.mani' \ ---dev_manifest_path='./dev.mani' \ +--train_manifest_path='./cloud/data/train.mani' \ +--dev_manifest_path='./cloud/data/dev.mani' \ --vocab_filepath=$VOCAB_PATH \ From ef5f0436f2d0f373820921a952e3bf517b340ad1 Mon Sep 17 00:00:00 2001 From: wanghaoshuang Date: Thu, 27 Jul 2017 14:13:14 +0800 Subject: [PATCH 093/335] Clean warning logs in cloud/README.md --- cloud/README.md | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/cloud/README.md b/cloud/README.md index 91a1d52a7..e7855ba82 100644 --- a/cloud/README.md +++ b/cloud/README.md @@ -26,15 +26,11 @@ label selector: paddle-job-pserver=deepspeech20170727130129, desired: 1 running pod list: [('Running', '10.1.3.6')] label selector: paddle-job=deepspeech20170727130129, desired: 1 running pod list: [('Running', '10.1.83.14')] -Starting training job: /pfs/dlnel/home/yanxu05@baidu.com/jobs/deepspeech20170727130129, num_gradient_servers: 1, trainer_id: 0, version: v2 +Starting training job: /pfs/dlnel/home/****@baidu.com/jobs/deepspeech20170727130129, num_gradient_servers: 1, trainer_id: 0, version: v2 I0727 05:01:42.969719 25 Util.cpp:166] commandline: --num_gradient_servers=1 --ports_num_for_sparse=1 --use_gpu=1 --trainer_id=0 --pservers=10.1.3.6 --trainer_count=4 --num_passes=1 --ports_num=1 --port=7164 [INFO 2017-07-27 05:01:50,279 layers.py:2430] output for __conv_0__: c = 32, h = 81, w = 54, size = 139968 [WARNING 2017-07-27 05:01:50,280 layers.py:2789] brelu is not recommend for batch normalization's activation, maybe the relu is better [INFO 2017-07-27 05:01:50,283 layers.py:2430] output for __conv_1__: c = 32, h = 41, w = 54, size = 70848 -[WARNING 2017-07-27 05:01:50,283 layers.py:2789] brelu is not recommend for batch normalization's activation, maybe the relu is better -[WARNING 2017-07-27 05:01:50,287 layers.py:2789] is not recommend for batch normalization's activation, maybe the relu is better -[WARNING 2017-07-27 05:01:50,291 layers.py:2789] is not recommend for batch normalization's activation, maybe the relu is better -[WARNING 2017-07-27 05:01:50,295 layers.py:2789] is not recommend for batch normalization's activation, maybe the relu is better I0727 05:01:50.316176 25 MultiGradientMachine.cpp:99] numLogicalDevices=1 numThreads=4 numDevices=4 I0727 05:01:50.454787 25 GradientMachine.cpp:85] Initing parameters.. I0727 05:01:50.690007 25 GradientMachine.cpp:92] Init parameters done. From 7d7984b8fb67854c58d1106dc8038a864b56d7a1 Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Thu, 27 Jul 2017 15:40:08 +0800 Subject: [PATCH 094/335] add test for libsndfile installation --- tests/test_setup.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 tests/test_setup.py diff --git a/tests/test_setup.py b/tests/test_setup.py new file mode 100644 index 000000000..bd6fabb0a --- /dev/null +++ b/tests/test_setup.py @@ -0,0 +1,12 @@ +"""Test Setup.""" +import unittest + + +class TestSetup(unittest.TestCase): + # test the installation of libsndfile library + def test_soundfile(self): + import soundfile + + +if __name__ == '__main__': + unittest.main() From b72b70e54c2b5a0e80a48926e437a51dc44a9256 Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Fri, 28 Jul 2017 16:19:44 +0800 Subject: [PATCH 095/335] add soundfile read/write unitest --- .gitignore | 1 - setup.sh | 24 +++++++++++++----------- tests/test_setup.py | 15 +++++++++++++-- 3 files changed, 26 insertions(+), 14 deletions(-) delete mode 100644 .gitignore diff --git a/.gitignore b/.gitignore deleted file mode 100644 index 0e0f559f1..000000000 --- a/.gitignore +++ /dev/null @@ -1 +0,0 @@ -thirdparty diff --git a/setup.sh b/setup.sh index 854f879e9..4d451a6f1 100644 --- a/setup.sh +++ b/setup.sh @@ -10,19 +10,21 @@ if [ $? != 0 ]; then fi # install package libsndfile -DIR="$( cd "$(dirname "$0")" ; pwd -P )" -mkdir thirdparty -curl -O "http://www.mega-nerd.com/libsndfile/files/libsndfile-1.0.28.tar.gz" +python -c "import soundfile" if [ $? != 0 ]; then - echo "Download libsndfile-1.0.28.tar.gz failed !!!" - exit 1 + echo "Install package libsndfile into default system path." + curl -O "http://www.mega-nerd.com/libsndfile/files/libsndfile-1.0.28.tar.gz" + if [ $? != 0 ]; then + echo "Download libsndfile-1.0.28.tar.gz failed !!!" + exit 1 + fi + tar -zxvf libsndfile-1.0.28.tar.gz + cd libsndfile-1.0.28 + ./configure && make && make install + cd .. + rm -rf libsndfile-1.0.28 + rm libsndfile-1.0.28.tar.gz fi -tar -zxvf libsndfile-1.0.28.tar.gz -cd libsndfile-1.0.28 -./configure --prefix=$DIR/thirdparty/libsndfile && make && make install -cd .. -rm -rf libsndfile-1.0.28 -rm libsndfile-1.0.28.tar.gz # prepare ./checkpoints mkdir checkpoints diff --git a/tests/test_setup.py b/tests/test_setup.py index bd6fabb0a..71a46afb7 100644 --- a/tests/test_setup.py +++ b/tests/test_setup.py @@ -1,11 +1,22 @@ """Test Setup.""" import unittest +import numpy as np +import os class TestSetup(unittest.TestCase): - # test the installation of libsndfile library def test_soundfile(self): - import soundfile + import soundfile as sf + # floating point data is typically limited to the interval [-1.0, 1.0], + # but smaller/larger values are supported as well + data = np.array([[1.75, -1.75], [1.0, -1.0], [0.5, -0.5], + [0.25, -0.25]]) + file = 'test.wav' + sf.write(file, data, 44100, format='WAV', subtype='FLOAT') + read, fs = sf.read(file) + assert np.all(read == data) + assert fs == 44100 + os.remove(file) if __name__ == '__main__': From de212572ed8e9d0167e643a52f5556ff6b97dba1 Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Fri, 28 Jul 2017 18:10:18 +0800 Subject: [PATCH 096/335] update unittest with comments --- tests/test_setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_setup.py b/tests/test_setup.py index 71a46afb7..18b9c1a0c 100644 --- a/tests/test_setup.py +++ b/tests/test_setup.py @@ -14,8 +14,8 @@ class TestSetup(unittest.TestCase): file = 'test.wav' sf.write(file, data, 44100, format='WAV', subtype='FLOAT') read, fs = sf.read(file) - assert np.all(read == data) - assert fs == 44100 + self.assertTrue(np.all(read == data)) + self.assertEqual(fs, 44100) os.remove(file) From a840f85423ffb51f8360496fd7d12e92dd737dbe Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Thu, 27 Jul 2017 10:02:54 +0800 Subject: [PATCH 097/335] change probs' computation into log scale & add best path decoder --- deploy/__init__.py | 0 deploy/ctc_beam_search_decoder.cpp | 189 ++++++++++++++++++++++------- deploy/ctc_beam_search_decoder.h | 4 + deploy/scorer.cpp | 9 +- deploy/scorer.h | 2 +- deploy/swig_decoder.py | 22 ++++ 6 files changed, 180 insertions(+), 46 deletions(-) create mode 100644 deploy/__init__.py create mode 100644 deploy/swig_decoder.py diff --git a/deploy/__init__.py b/deploy/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/deploy/ctc_beam_search_decoder.cpp b/deploy/ctc_beam_search_decoder.cpp index a684b30a6..af6414a97 100644 --- a/deploy/ctc_beam_search_decoder.cpp +++ b/deploy/ctc_beam_search_decoder.cpp @@ -3,8 +3,11 @@ #include #include #include +#include #include "ctc_beam_search_decoder.h" +typedef float log_prob_type; + template bool pair_comp_first_rev(const std::pair a, const std::pair b) { @@ -17,6 +20,65 @@ bool pair_comp_second_rev(const std::pair a, const std::pair b) return a.second > b.second; } +template +T log_sum_exp(T x, T y) +{ + static T num_min = -std::numeric_limits::max(); + if (x <= -num_min) return y; + if (y <= -num_min) return x; + T xmax = std::max(x, y); + return std::log(std::exp(x-xmax) + std::exp(y-xmax)) + xmax; +} + +std::string ctc_best_path_decoder(std::vector > probs_seq, + std::vector vocabulary) { + // dimension check + int num_time_steps = probs_seq.size(); + for (int i=0; i max_idx_vec; + double max_prob = 0.0; + int max_idx = 0; + for (int i=0; i idx_vec; + for (int i=0; i0) && max_idx_vec[i]!=max_idx_vec[i-1])) { + std::cout< > ctc_beam_search_decoder(std::vector > probs_seq, int beam_size, @@ -52,106 +114,147 @@ std::vector > // initialize // two sets containing selected and candidate prefixes respectively - std::map prefix_set_prev, prefix_set_next; + std::map prefix_set_prev, prefix_set_next; // probability of prefixes ending with blank and non-blank - std::map probs_b_prev, probs_nb_prev; - std::map probs_b_cur, probs_nb_cur; - prefix_set_prev["\t"] = 1.0; - probs_b_prev["\t"] = 1.0; - probs_nb_prev["\t"] = 0.0; + std::map log_probs_b_prev, log_probs_nb_prev; + std::map log_probs_b_cur, log_probs_nb_cur; + + static log_prob_type NUM_MAX = std::numeric_limits::max(); + prefix_set_prev["\t"] = 0.0; + log_probs_b_prev["\t"] = 0.0; + log_probs_nb_prev["\t"] = -NUM_MAX; for (int time_step=0; time_step prob = probs_seq[time_step]; std::vector > prob_idx; for (int i=0; i(i, prob[i])); } + // pruning of vacobulary + int cutoff_len = prob.size(); if (cutoff_prob < 1.0) { - std::sort(prob_idx.begin(), prob_idx.end(), + std::sort(prob_idx.begin(), + prob_idx.end(), pair_comp_second_rev); - float cum_prob = 0.0; - int cutoff_len = 0; + double cum_prob = 0.0; + cutoff_len = 0; for (int i=0; i= cutoff_prob) break; } prob_idx = std::vector >( prob_idx.begin(), - prob_idx.begin() + cutoff_len); + prob_idx.begin() + cutoff_len); } + + std::vector > log_prob_idx; + for (int i=0; i + (prob_idx[i].first, log(prob_idx[i].second))); + } + // extend prefix - for (std::map::iterator it = prefix_set_prev.begin(); + for (std::map::iterator + it = prefix_set_prev.begin(); it != prefix_set_prev.end(); it++) { std::string l = it->first; if( prefix_set_next.find(l) == prefix_set_next.end()) { - probs_b_cur[l] = probs_nb_cur[l] = 0.0; + log_probs_b_cur[l] = log_probs_nb_cur[l] = -NUM_MAX; } - for (int index=0; index 1) { - score = ext_scorer->get_score(l.substr(1)); + score = ext_scorer->get_score(l.substr(1), true); } - probs_nb_cur[l_plus] += score * prob_c * ( - probs_b_prev[l] + probs_nb_prev[l]); + log_probs_prev = log_sum_exp(log_probs_b_prev[l], + log_probs_nb_prev[l]); + log_probs_nb_cur[l_plus] = log_sum_exp( + log_probs_nb_cur[l_plus], + score + log_prob_c + log_probs_prev + ); } else { - probs_nb_cur[l_plus] += prob_c * ( - probs_b_prev[l] + probs_nb_prev[l]); + log_probs_prev = log_sum_exp(log_probs_b_prev[l], + log_probs_nb_prev[l]); + log_probs_nb_cur[l_plus] = log_sum_exp( + log_probs_nb_cur[l_plus], + log_prob_c+log_probs_prev + ); } - prefix_set_next[l_plus] = probs_nb_cur[l_plus] + probs_b_cur[l_plus]; + prefix_set_next[l_plus] = log_sum_exp( + log_probs_nb_cur[l_plus], + log_probs_b_cur[l_plus] + ); } } - prefix_set_next[l] = probs_b_cur[l] + probs_nb_cur[l]; + prefix_set_next[l] = log_sum_exp(log_probs_b_cur[l], + log_probs_nb_cur[l]); } - probs_b_prev = probs_b_cur; - probs_nb_prev = probs_nb_cur; - std::vector > + log_probs_b_prev = log_probs_b_cur; + log_probs_nb_prev = log_probs_nb_cur; + std::vector > prefix_vec_next(prefix_set_next.begin(), prefix_set_next.end()); std::sort(prefix_vec_next.begin(), prefix_vec_next.end(), - pair_comp_second_rev); - int k = beam_size - (prefix_vec_next.begin(), prefix_vec_next.begin()+k); + pair_comp_second_rev); + int num_prefixes_next = prefix_vec_next.size(); + int k = beam_size ( + prefix_vec_next.begin(), + prefix_vec_next.begin() + k + ); } // post processing std::vector > beam_result; - for (std::map::iterator it = prefix_set_prev.begin(); - it != prefix_set_prev.end(); it++) { - if (it->second > 0.0 && it->first.size() > 1) { - double prob = it->second; + for (std::map::iterator + it = prefix_set_prev.begin(); it != prefix_set_prev.end(); it++) { + if (it->second > -NUM_MAX && it->first.size() > 1) { + log_prob_type log_prob = it->second; std::string sentence = it->first.substr(1); // scoring the last word if (ext_scorer != NULL && sentence[sentence.size()-1] != ' ') { - prob = prob * ext_scorer->get_score(sentence); + log_prob = log_prob + ext_scorer->get_score(sentence, true); + } + if (log_prob > -NUM_MAX) { + std::pair cur_result(log_prob, sentence); + beam_result.push_back(cur_result); } - double log_prob = log(prob); - beam_result.push_back(std::pair(log_prob, sentence)); } } // sort the result and return diff --git a/deploy/ctc_beam_search_decoder.h b/deploy/ctc_beam_search_decoder.h index a4bb6aa74..de7e7791d 100644 --- a/deploy/ctc_beam_search_decoder.h +++ b/deploy/ctc_beam_search_decoder.h @@ -31,5 +31,9 @@ std::vector > Scorer *ext_scorer=NULL, bool nproc=false ); +/* CTC Best Path Decoder + */ +std::string ctc_best_path_decoder(std::vector > probs_seq, + std::vector vocabulary); #endif // CTC_BEAM_SEARCH_DECODER_H_ diff --git a/deploy/scorer.cpp b/deploy/scorer.cpp index d438ec1bd..e9a74b989 100644 --- a/deploy/scorer.cpp +++ b/deploy/scorer.cpp @@ -89,10 +89,15 @@ void Scorer::reset_params(float alpha, float beta) { this->_beta = beta; } -double Scorer::get_score(std::string sentence) { +double Scorer::get_score(std::string sentence, bool log) { double lm_score = language_model_score(sentence); int word_cnt = word_count(sentence); - double final_score = pow(10, _alpha*lm_score) * pow(word_cnt, _beta); + double final_score = 0.0; + if (log == false) { + final_score = pow(10, _alpha*lm_score) * pow(word_cnt, _beta); + } else { + final_score = _alpha*lm_score*std::log(10) + _beta*std::log(word_cnt); + } return final_score; } diff --git a/deploy/scorer.h b/deploy/scorer.h index 7b305772c..a18e119bc 100644 --- a/deploy/scorer.h +++ b/deploy/scorer.h @@ -30,7 +30,7 @@ public: // reset params alpha & beta void reset_params(float alpha, float beta); // get the final score - double get_score(std::string); + double get_score(std::string, bool log=false); }; #endif //SCORER_H_ diff --git a/deploy/swig_decoder.py b/deploy/swig_decoder.py new file mode 100644 index 000000000..fed23c9ef --- /dev/null +++ b/deploy/swig_decoder.py @@ -0,0 +1,22 @@ +"""Contains various CTC decoders in SWIG.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from swig_ctc_beam_search_decoder import ctc_beam_search_decoder as beam_search_decoder +from swig_ctc_beam_search_decoder import ctc_best_path_decoder as best_path__decoder + + +def ctc_best_path_decoder(probs_seq, vocabulary): + best_path__decoder(probs_seq.to_list(), vocabulary) + + +def ctc_beam_search_decoder( + probs_seq, + beam_size, + vocabulary, + blank_id, + cutoff_prob=1.0, + ext_scoring_func=None, ): + beam_search_decoder(probs_seq.to_list(), beam_size, vocabulary, blank_id, + cutoff_prob, ext_scoring_func) From 92eacf548bf5ca278a2ad741dd9c901ca6d23a8f Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Mon, 31 Jul 2017 21:57:07 +0800 Subject: [PATCH 098/335] Update default config params and result display for evaluator.py and infer.py for DS2. --- evaluate.py | 26 ++++++++++++++++++-------- infer.py | 9 +++++++-- 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/evaluate.py b/evaluate.py index 19eabf4e5..1d758687b 100644 --- a/evaluate.py +++ b/evaluate.py @@ -4,6 +4,7 @@ from __future__ import division from __future__ import print_function import distutils.util +import sys import argparse import gzip import paddle.v2 as paddle @@ -12,13 +13,19 @@ from model import deep_speech2 from decoder import * from lm.lm_scorer import LmScorer from error_rate import wer +import utils parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( "--batch_size", - default=100, + default=128, type=int, help="Minibatch size for evaluation. (default: %(default)s)") +parser.add_argument( + "--trainer_count", + default=8, + type=int, + help="Trainer number. (default: %(default)s)") parser.add_argument( "--num_conv_layers", default=2, @@ -58,8 +65,8 @@ parser.add_argument( "--decode_method", default='beam_search', type=str, - help="Method for ctc decoding, best_path or beam_search. (default: %(default)s)" -) + help="Method for ctc decoding, best_path or beam_search. " + "(default: %(default)s)") parser.add_argument( "--language_model_path", default="lm/data/common_crawl_00.prune01111.trie.klm", @@ -67,12 +74,12 @@ parser.add_argument( help="Path for language model. (default: %(default)s)") parser.add_argument( "--alpha", - default=0.26, + default=0.36, type=float, help="Parameter associated with language model. (default: %(default)f)") parser.add_argument( "--beta", - default=0.1, + default=0.25, type=float, help="Parameter associated with word count. (default: %(default)f)") parser.add_argument( @@ -191,7 +198,7 @@ def evaluate(): blank_id=len(data_generator.vocab_list), num_processes=args.num_processes_beam_search, ext_scoring_func=ext_scorer, - cutoff_prob=args.cutoff_prob, ) + cutoff_prob=args.cutoff_prob) for i, beam_search_result in enumerate(beam_search_results): wer_sum += wer(target_transcription[i], beam_search_result[0][1]) @@ -199,12 +206,15 @@ def evaluate(): else: raise ValueError("Decoding method [%s] is not supported." % decode_method) + print("WER (%d/?) = %f" % (wer_counter, wer_sum / wer_counter)) - print("Final WER = %f" % (wer_sum / wer_counter)) + print("Final WER (%d/%d) = %f" % (wer_counter, wer_counter, + wer_sum / wer_counter)) def main(): - paddle.init(use_gpu=args.use_gpu, trainer_count=1) + utils.print_arguments(args) + paddle.init(use_gpu=args.use_gpu, trainer_count=args.trainer_count) evaluate() diff --git a/infer.py b/infer.py index 817526302..ad3fdc4d7 100644 --- a/infer.py +++ b/infer.py @@ -57,6 +57,11 @@ parser.add_argument( type=str, help="Feature type of audio data: 'linear' (power spectrum)" " or 'mfcc'. (default: %(default)s)") +parser.add_argument( + "--trainer_count", + default=8, + type=int, + help="Trainer number. (default: %(default)s)") parser.add_argument( "--mean_std_filepath", default='mean_std.npz', @@ -208,7 +213,7 @@ def infer(): wer_cur = wer(target_transcription[i], beam_search_result[0][1]) wer_sum += wer_cur wer_counter += 1 - print("cur wer = %f , average wer = %f" % + print("Current WER = %f , Average WER = %f" % (wer_cur, wer_sum / wer_counter)) else: raise ValueError("Decoding method [%s] is not supported." % @@ -217,7 +222,7 @@ def infer(): def main(): utils.print_arguments(args) - paddle.init(use_gpu=args.use_gpu, trainer_count=1) + paddle.init(use_gpu=args.use_gpu, trainer_count=args.trainer_count) infer() From e2f954f5e258957ef0cd8145da9c36d94a543a28 Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Tue, 1 Aug 2017 15:27:41 +0800 Subject: [PATCH 099/335] make kenlm install more robust --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 3f73ea8b8..eb6022599 100755 --- a/requirements.txt +++ b/requirements.txt @@ -3,4 +3,4 @@ scipy==0.13.1 resampy==0.1.5 SoundFile==0.9.0.post1 python_speech_features -https://github.com/kpu/kenlm/archive/master.zip +https://github.com/luotao1/kenlm/archive/master.zip From 8122dd9c2999ac451e5a02e22f67d1ba09bfb51c Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Tue, 1 Aug 2017 16:21:46 +0800 Subject: [PATCH 100/335] Simplify train.py, evaluate.py, infer.py and tune.py by adding DeepSpeech2Model class. --- evaluate.py | 107 ++++++--------------- infer.py | 106 +++++---------------- layer.py | 155 ++++++++++++++++++++++++++++++ model.py | 265 +++++++++++++++++++++++++++------------------------- train.py | 121 +++++++----------------- tune.py | 102 ++++++++------------ 6 files changed, 415 insertions(+), 441 deletions(-) create mode 100644 layer.py diff --git a/evaluate.py b/evaluate.py index 1d758687b..fb7211fc2 100644 --- a/evaluate.py +++ b/evaluate.py @@ -4,14 +4,11 @@ from __future__ import division from __future__ import print_function import distutils.util -import sys import argparse -import gzip +import multiprocessing import paddle.v2 as paddle from data_utils.data import DataGenerator -from model import deep_speech2 -from decoder import * -from lm.lm_scorer import LmScorer +from model import DeepSpeech2Model from error_rate import wer import utils @@ -119,37 +116,12 @@ args = parser.parse_args() def evaluate(): """Evaluate on whole test data for DeepSpeech2.""" - # initialize data generator data_generator = DataGenerator( vocab_filepath=args.vocab_filepath, mean_std_filepath=args.mean_std_filepath, augmentation_config='{}', specgram_type=args.specgram_type, num_threads=args.num_threads_data) - - # create network config - # paddle.data_type.dense_array is used for variable batch input. - # The size 161 * 161 is only an placeholder value and the real shape - # of input batch data will be induced during training. - audio_data = paddle.layer.data( - name="audio_spectrogram", type=paddle.data_type.dense_array(161 * 161)) - text_data = paddle.layer.data( - name="transcript_text", - type=paddle.data_type.integer_value_sequence(data_generator.vocab_size)) - output_probs = deep_speech2( - audio_data=audio_data, - text_data=text_data, - dict_size=data_generator.vocab_size, - num_conv_layers=args.num_conv_layers, - num_rnn_layers=args.num_rnn_layers, - rnn_size=args.rnn_layer_size, - is_inference=True) - - # load parameters - parameters = paddle.parameters.Parameters.from_tar( - gzip.open(args.model_filepath)) - - # prepare infer data batch_reader = data_generator.batch_reader_creator( manifest_path=args.decode_manifest_path, batch_size=args.batch_size, @@ -157,59 +129,34 @@ def evaluate(): sortagrad=False, shuffle_method=None) - # define inferer - inferer = paddle.inference.Inference( - output_layer=output_probs, parameters=parameters) - - # initialize external scorer for beam search decoding - if args.decode_method == 'beam_search': - ext_scorer = LmScorer(args.alpha, args.beta, args.language_model_path) + ds2_model = DeepSpeech2Model( + vocab_size=data_generator.vocab_size, + num_conv_layers=args.num_conv_layers, + num_rnn_layers=args.num_rnn_layers, + rnn_layer_size=args.rnn_layer_size, + pretrained_model_path=args.model_filepath) - wer_counter, wer_sum = 0, 0.0 + wer_sum, num_ins = 0.0, 0 for infer_data in batch_reader(): - # run inference - infer_results = inferer.infer(input=infer_data) - num_steps = len(infer_results) // len(infer_data) - probs_split = [ - infer_results[i * num_steps:(i + 1) * num_steps] - for i in xrange(0, len(infer_data)) + result_transcripts = ds2_model.infer_batch( + infer_data=infer_data, + decode_method=args.decode_method, + beam_alpha=args.alpha, + beam_beta=args.beta, + beam_size=args.beam_size, + cutoff_prob=args.cutoff_prob, + vocab_list=data_generator.vocab_list, + language_model_path=args.language_model_path, + num_processes=args.num_processes_beam_search) + target_transcripts = [ + ''.join([data_generator.vocab_list[token] for token in transcript]) + for _, transcript in infer_data ] - # target transcription - target_transcription = [ - ''.join([ - data_generator.vocab_list[index] for index in infer_data[i][1] - ]) for i, probs in enumerate(probs_split) - ] - # decode and print - # best path decode - if args.decode_method == "best_path": - for i, probs in enumerate(probs_split): - output_transcription = ctc_best_path_decoder( - probs_seq=probs, vocabulary=data_generator.vocab_list) - wer_sum += wer(target_transcription[i], output_transcription) - wer_counter += 1 - # beam search decode - elif args.decode_method == "beam_search": - # beam search using multiple processes - beam_search_results = ctc_beam_search_decoder_batch( - probs_split=probs_split, - vocabulary=data_generator.vocab_list, - beam_size=args.beam_size, - blank_id=len(data_generator.vocab_list), - num_processes=args.num_processes_beam_search, - ext_scoring_func=ext_scorer, - cutoff_prob=args.cutoff_prob) - for i, beam_search_result in enumerate(beam_search_results): - wer_sum += wer(target_transcription[i], - beam_search_result[0][1]) - wer_counter += 1 - else: - raise ValueError("Decoding method [%s] is not supported." % - decode_method) - print("WER (%d/?) = %f" % (wer_counter, wer_sum / wer_counter)) - - print("Final WER (%d/%d) = %f" % (wer_counter, wer_counter, - wer_sum / wer_counter)) + for target, result in zip(target_transcripts, result_transcripts): + wer_sum += wer(target, result) + num_ins += 1 + print("WER (%d/?) = %f" % (num_ins, wer_sum / num_ins)) + print("Final WER (%d/%d) = %f" % (num_ins, num_ins, wer_sum / num_ins)) def main(): diff --git a/infer.py b/infer.py index ad3fdc4d7..ec65cc748 100644 --- a/infer.py +++ b/infer.py @@ -4,14 +4,11 @@ from __future__ import division from __future__ import print_function import argparse -import gzip import distutils.util import multiprocessing import paddle.v2 as paddle from data_utils.data import DataGenerator -from model import deep_speech2 -from decoder import * -from lm.lm_scorer import LmScorer +from model import DeepSpeech2Model from error_rate import wer import utils @@ -124,37 +121,12 @@ args = parser.parse_args() def infer(): """Inference for DeepSpeech2.""" - # initialize data generator data_generator = DataGenerator( vocab_filepath=args.vocab_filepath, mean_std_filepath=args.mean_std_filepath, augmentation_config='{}', specgram_type=args.specgram_type, num_threads=args.num_threads_data) - - # create network config - # paddle.data_type.dense_array is used for variable batch input. - # The size 161 * 161 is only an placeholder value and the real shape - # of input batch data will be induced during training. - audio_data = paddle.layer.data( - name="audio_spectrogram", type=paddle.data_type.dense_array(161 * 161)) - text_data = paddle.layer.data( - name="transcript_text", - type=paddle.data_type.integer_value_sequence(data_generator.vocab_size)) - output_probs = deep_speech2( - audio_data=audio_data, - text_data=text_data, - dict_size=data_generator.vocab_size, - num_conv_layers=args.num_conv_layers, - num_rnn_layers=args.num_rnn_layers, - rnn_size=args.rnn_layer_size, - is_inference=True) - - # load parameters - parameters = paddle.parameters.Parameters.from_tar( - gzip.open(args.model_filepath)) - - # prepare infer data batch_reader = data_generator.batch_reader_creator( manifest_path=args.decode_manifest_path, batch_size=args.num_samples, @@ -163,61 +135,31 @@ def infer(): shuffle_method=None) infer_data = batch_reader().next() - # run inference - infer_results = paddle.infer( - output_layer=output_probs, parameters=parameters, input=infer_data) - num_steps = len(infer_results) // len(infer_data) - probs_split = [ - infer_results[i * num_steps:(i + 1) * num_steps] - for i in xrange(len(infer_data)) - ] + ds2_model = DeepSpeech2Model( + vocab_size=data_generator.vocab_size, + num_conv_layers=args.num_conv_layers, + num_rnn_layers=args.num_rnn_layers, + rnn_layer_size=args.rnn_layer_size, + pretrained_model_path=args.model_filepath) + result_transcripts = ds2_model.infer_batch( + infer_data=infer_data, + decode_method=args.decode_method, + beam_alpha=args.alpha, + beam_beta=args.beta, + beam_size=args.beam_size, + cutoff_prob=args.cutoff_prob, + vocab_list=data_generator.vocab_list, + language_model_path=args.language_model_path, + num_processes=args.num_processes_beam_search) - # targe transcription - target_transcription = [ - ''.join( - [data_generator.vocab_list[index] for index in infer_data[i][1]]) - for i, probs in enumerate(probs_split) + target_transcripts = [ + ''.join([data_generator.vocab_list[token] for token in transcript]) + for _, transcript in infer_data ] - - ## decode and print - # best path decode - wer_sum, wer_counter = 0, 0 - if args.decode_method == "best_path": - for i, probs in enumerate(probs_split): - best_path_transcription = ctc_best_path_decoder( - probs_seq=probs, vocabulary=data_generator.vocab_list) - print("\nTarget Transcription: %s\nOutput Transcription: %s" % - (target_transcription[i], best_path_transcription)) - wer_cur = wer(target_transcription[i], best_path_transcription) - wer_sum += wer_cur - wer_counter += 1 - print("cur wer = %f, average wer = %f" % - (wer_cur, wer_sum / wer_counter)) - # beam search decode - elif args.decode_method == "beam_search": - ext_scorer = LmScorer(args.alpha, args.beta, args.language_model_path) - beam_search_batch_results = ctc_beam_search_decoder_batch( - probs_split=probs_split, - vocabulary=data_generator.vocab_list, - beam_size=args.beam_size, - blank_id=len(data_generator.vocab_list), - num_processes=args.num_processes_beam_search, - cutoff_prob=args.cutoff_prob, - ext_scoring_func=ext_scorer, ) - for i, beam_search_result in enumerate(beam_search_batch_results): - print("\nTarget Transcription:\t%s" % target_transcription[i]) - for index in xrange(args.num_results_per_sample): - result = beam_search_result[index] - #output: index, log prob, beam result - print("Beam %d: %f \t%s" % (index, result[0], result[1])) - wer_cur = wer(target_transcription[i], beam_search_result[0][1]) - wer_sum += wer_cur - wer_counter += 1 - print("Current WER = %f , Average WER = %f" % - (wer_cur, wer_sum / wer_counter)) - else: - raise ValueError("Decoding method [%s] is not supported." % - decode_method) + for target, result in zip(target_transcripts, result_transcripts): + print("\nTarget Transcription: %s\nOutput Transcription: %s" % + (target, result)) + print("Current wer = %f" % wer(target, result)) def main(): diff --git a/layer.py b/layer.py new file mode 100644 index 000000000..7b0273389 --- /dev/null +++ b/layer.py @@ -0,0 +1,155 @@ +"""Contains DeepSpeech2 layers.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import paddle.v2 as paddle + +DISABLE_CUDNN_BATCH_NORM = True + + +def conv_bn_layer(input, filter_size, num_channels_in, num_channels_out, stride, + padding, act): + """ + Convolution layer with batch normalization. + """ + conv_layer = paddle.layer.img_conv( + input=input, + filter_size=filter_size, + num_channels=num_channels_in, + num_filters=num_channels_out, + stride=stride, + padding=padding, + act=paddle.activation.Linear(), + bias_attr=False) + if DISABLE_CUDNN_BATCH_NORM: + # temopary patch, need to be removed. + return paddle.layer.batch_norm( + input=conv_layer, act=act, batch_norm_type="batch_norm") + else: + return paddle.layer.batch_norm(input=conv_layer, act=act) + + +def bidirectional_simple_rnn_bn_layer(name, input, size, act): + """ + Bidirectonal simple rnn layer with sequence-wise batch normalization. + The batch normalization is only performed on input-state weights. + """ + # input-hidden weights shared across bi-direcitonal rnn. + input_proj = paddle.layer.fc( + input=input, size=size, act=paddle.activation.Linear(), bias_attr=False) + # batch norm is only performed on input-state projection + if DISABLE_CUDNN_BATCH_NORM: + # temopary patch, need to be removed. + input_proj_bn = paddle.layer.batch_norm( + input=input_proj, + act=paddle.activation.Linear(), + batch_norm_type="batch_norm") + else: + input_proj_bn = paddle.layer.batch_norm( + input=input_proj, act=paddle.activation.Linear()) + # forward and backward in time + forward_simple_rnn = paddle.layer.recurrent( + input=input_proj_bn, act=act, reverse=False) + backward_simple_rnn = paddle.layer.recurrent( + input=input_proj_bn, act=act, reverse=True) + return paddle.layer.concat(input=[forward_simple_rnn, backward_simple_rnn]) + + +def conv_group(input, num_stacks): + """ + Convolution group with several stacking convolution layers. + """ + conv = conv_bn_layer( + input=input, + filter_size=(11, 41), + num_channels_in=1, + num_channels_out=32, + stride=(3, 2), + padding=(5, 20), + act=paddle.activation.BRelu()) + for i in xrange(num_stacks - 1): + conv = conv_bn_layer( + input=conv, + filter_size=(11, 21), + num_channels_in=32, + num_channels_out=32, + stride=(1, 2), + padding=(5, 10), + act=paddle.activation.BRelu()) + output_num_channels = 32 + output_height = 160 // pow(2, num_stacks) + 1 + return conv, output_num_channels, output_height + + +def rnn_group(input, size, num_stacks): + """ + RNN group with several stacking RNN layers. + """ + output = input + for i in xrange(num_stacks): + output = bidirectional_simple_rnn_bn_layer( + name=str(i), input=output, size=size, act=paddle.activation.BRelu()) + return output + + +def deep_speech2(audio_data, + text_data, + dict_size, + num_conv_layers=2, + num_rnn_layers=3, + rnn_size=256): + """ + The whole DeepSpeech2 model structure (a simplified version). + + :param audio_data: Audio spectrogram data layer. + :type audio_data: LayerOutput + :param text_data: Transcription text data layer. + :type text_data: LayerOutput + :param dict_size: Dictionary size for tokenized transcription. + :type dict_size: int + :param num_conv_layers: Number of stacking convolution layers. + :type num_conv_layers: int + :param num_rnn_layers: Number of stacking RNN layers. + :type num_rnn_layers: int + :param rnn_size: RNN layer size (number of RNN cells). + :type rnn_size: int + :param is_inference: False in the training mode, and True in the + inferene mode. + :type is_inference: bool + :return: If is_inference set False, return a ctc cost layer; + if is_inference set True, return a sequence layer of output + probability distribution. + :rtype: tuple of LayerOutput + """ + # convolution group + conv_group_output, conv_group_num_channels, conv_group_height = conv_group( + input=audio_data, num_stacks=num_conv_layers) + # convert data form convolution feature map to sequence of vectors + conv2seq = paddle.layer.block_expand( + input=conv_group_output, + num_channels=conv_group_num_channels, + stride_x=1, + stride_y=1, + block_x=1, + block_y=conv_group_height) + # rnn group + rnn_group_output = rnn_group( + input=conv2seq, size=rnn_size, num_stacks=num_rnn_layers) + fc = paddle.layer.fc( + input=rnn_group_output, + size=dict_size + 1, + act=paddle.activation.Linear(), + bias_attr=True) + # probability distribution with softmax + log_probs = paddle.layer.mixed( + input=paddle.layer.identity_projection(input=fc), + act=paddle.activation.Softmax()) + # ctc cost + ctc_loss = paddle.layer.warp_ctc( + input=fc, + label=text_data, + size=dict_size + 1, + blank=dict_size, + norm_by_times=True) + return log_probs, ctc_loss diff --git a/model.py b/model.py index cb0b4ecbb..d1efabb75 100644 --- a/model.py +++ b/model.py @@ -3,141 +3,150 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import sys +import os +import time +import gzip +from decoder import * +from lm.lm_scorer import LmScorer import paddle.v2 as paddle +from layer import * -def conv_bn_layer(input, filter_size, num_channels_in, num_channels_out, stride, - padding, act): - """ - Convolution layer with batch normalization. - """ - conv_layer = paddle.layer.img_conv( - input=input, - filter_size=filter_size, - num_channels=num_channels_in, - num_filters=num_channels_out, - stride=stride, - padding=padding, - act=paddle.activation.Linear(), - bias_attr=False) - return paddle.layer.batch_norm(input=conv_layer, act=act) +class DeepSpeech2Model(object): + def __init__(self, vocab_size, num_conv_layers, num_rnn_layers, + rnn_layer_size, pretrained_model_path): + self._create_network(vocab_size, num_conv_layers, num_rnn_layers, + rnn_layer_size) + self._create_parameters(pretrained_model_path) + self._inferer = None + self._ext_scorer = None + def train(self, + train_batch_reader, + dev_batch_reader, + feeding_dict, + learning_rate, + gradient_clipping, + num_passes, + num_iterations_print=100, + output_model_dir='checkpoints'): + # prepare optimizer and trainer + optimizer = paddle.optimizer.Adam( + learning_rate=learning_rate, + gradient_clipping_threshold=gradient_clipping) + trainer = paddle.trainer.SGD( + cost=self._loss, + parameters=self._parameters, + update_equation=optimizer) -def bidirectional_simple_rnn_bn_layer(name, input, size, act): - """ - Bidirectonal simple rnn layer with sequence-wise batch normalization. - The batch normalization is only performed on input-state weights. - """ - # input-hidden weights shared across bi-direcitonal rnn. - input_proj = paddle.layer.fc( - input=input, size=size, act=paddle.activation.Linear(), bias_attr=False) - # batch norm is only performed on input-state projection - input_proj_bn = paddle.layer.batch_norm( - input=input_proj, act=paddle.activation.Linear()) - # forward and backward in time - forward_simple_rnn = paddle.layer.recurrent( - input=input_proj_bn, act=act, reverse=False) - backward_simple_rnn = paddle.layer.recurrent( - input=input_proj_bn, act=act, reverse=True) - return paddle.layer.concat(input=[forward_simple_rnn, backward_simple_rnn]) + # create event handler + def event_handler(event): + global start_time, cost_sum, cost_counter + if isinstance(event, paddle.event.EndIteration): + cost_sum += event.cost + cost_counter += 1 + if (event.batch_id + 1) % num_iterations_print == 0: + output_model_path = os.path.join(output_model_dir, + "params.latest.tar.gz") + with gzip.open(output_model_path, 'w') as f: + self._parameters.to_tar(f) + print("\nPass: %d, Batch: %d, TrainCost: %f" % + (event.pass_id, event.batch_id + 1, + cost_sum / cost_counter)) + cost_sum, cost_counter = 0.0, 0 + else: + sys.stdout.write('.') + sys.stdout.flush() + if isinstance(event, paddle.event.BeginPass): + start_time = time.time() + cost_sum, cost_counter = 0.0, 0 + if isinstance(event, paddle.event.EndPass): + result = trainer.test( + reader=dev_batch_reader, feeding=feeding_dict) + output_model_path = os.path.join( + output_model_dir, "params.pass-%d.tar.gz" % event.pass_id) + with gzip.open(output_model_path, 'w') as f: + self._parameters.to_tar(f) + print("\n------- Time: %d sec, Pass: %d, ValidationCost: %s" % + (time.time() - start_time, event.pass_id, result.cost)) + # run train + trainer.train( + reader=train_batch_reader, + event_handler=event_handler, + num_passes=num_passes, + feeding=feeding_dict) -def conv_group(input, num_stacks): - """ - Convolution group with several stacking convolution layers. - """ - conv = conv_bn_layer( - input=input, - filter_size=(11, 41), - num_channels_in=1, - num_channels_out=32, - stride=(3, 2), - padding=(5, 20), - act=paddle.activation.BRelu()) - for i in xrange(num_stacks - 1): - conv = conv_bn_layer( - input=conv, - filter_size=(11, 21), - num_channels_in=32, - num_channels_out=32, - stride=(1, 2), - padding=(5, 10), - act=paddle.activation.BRelu()) - output_num_channels = 32 - output_height = 160 // pow(2, num_stacks) + 1 - return conv, output_num_channels, output_height + def infer_batch(self, infer_data, decode_method, beam_alpha, beam_beta, + beam_size, cutoff_prob, vocab_list, language_model_path, + num_processes): + # define inferer + if self._inferer == None: + self._inferer = paddle.inference.Inference( + output_layer=self._log_probs, parameters=self._parameters) + # run inference + infer_results = self._inferer.infer(input=infer_data) + num_steps = len(infer_results) // len(infer_data) + probs_split = [ + infer_results[i * num_steps:(i + 1) * num_steps] + for i in xrange(0, len(infer_data)) + ] + # run decoder + results = [] + if decode_method == "best_path": + # best path decode + for i, probs in enumerate(probs_split): + output_transcription = ctc_best_path_decoder( + probs_seq=probs, vocabulary=data_generator.vocab_list) + results.append(output_transcription) + elif decode_method == "beam_search": + # initialize external scorer + if self._ext_scorer == None: + self._ext_scorer = LmScorer(beam_alpha, beam_beta, + language_model_path) + self._loaded_lm_path = language_model_path + else: + self._ext_scorer.reset_params(beam_alpha, beam_beta) + assert self._loaded_lm_path == language_model_path + # beam search decode + beam_search_results = ctc_beam_search_decoder_batch( + probs_split=probs_split, + vocabulary=vocab_list, + beam_size=beam_size, + blank_id=len(vocab_list), + num_processes=num_processes, + ext_scoring_func=self._ext_scorer, + cutoff_prob=cutoff_prob) + results = [result[0][1] for result in beam_search_results] + else: + raise ValueError("Decoding method [%s] is not supported." % + decode_method) + return results -def rnn_group(input, size, num_stacks): - """ - RNN group with several stacking RNN layers. - """ - output = input - for i in xrange(num_stacks): - output = bidirectional_simple_rnn_bn_layer( - name=str(i), input=output, size=size, act=paddle.activation.BRelu()) - return output + def _create_parameters(self, model_path=None): + if model_path is None: + self._parameters = paddle.parameters.create(self._loss) + else: + self._parameters = paddle.parameters.Parameters.from_tar( + gzip.open(model_path)) - -def deep_speech2(audio_data, - text_data, - dict_size, - num_conv_layers=2, - num_rnn_layers=3, - rnn_size=256, - is_inference=False): - """ - The whole DeepSpeech2 model structure (a simplified version). - - :param audio_data: Audio spectrogram data layer. - :type audio_data: LayerOutput - :param text_data: Transcription text data layer. - :type text_data: LayerOutput - :param dict_size: Dictionary size for tokenized transcription. - :type dict_size: int - :param num_conv_layers: Number of stacking convolution layers. - :type num_conv_layers: int - :param num_rnn_layers: Number of stacking RNN layers. - :type num_rnn_layers: int - :param rnn_size: RNN layer size (number of RNN cells). - :type rnn_size: int - :param is_inference: False in the training mode, and True in the - inferene mode. - :type is_inference: bool - :return: If is_inference set False, return a ctc cost layer; - if is_inference set True, return a sequence layer of output - probability distribution. - :rtype: tuple of LayerOutput - """ - # convolution group - conv_group_output, conv_group_num_channels, conv_group_height = conv_group( - input=audio_data, num_stacks=num_conv_layers) - # convert data form convolution feature map to sequence of vectors - conv2seq = paddle.layer.block_expand( - input=conv_group_output, - num_channels=conv_group_num_channels, - stride_x=1, - stride_y=1, - block_x=1, - block_y=conv_group_height) - # rnn group - rnn_group_output = rnn_group( - input=conv2seq, size=rnn_size, num_stacks=num_rnn_layers) - fc = paddle.layer.fc( - input=rnn_group_output, - size=dict_size + 1, - act=paddle.activation.Linear(), - bias_attr=True) - if is_inference: - # probability distribution with softmax - return paddle.layer.mixed( - input=paddle.layer.identity_projection(input=fc), - act=paddle.activation.Softmax()) - else: - # ctc cost - return paddle.layer.warp_ctc( - input=fc, - label=text_data, - size=dict_size + 1, - blank=dict_size, - norm_by_times=True) + def _create_network(self, vocab_size, num_conv_layers, num_rnn_layers, + rnn_layer_size): + # paddle.data_type.dense_array is used for variable batch input. + # The size 161 * 161 is only an placeholder value and the real shape + # of input batch data will be induced during training. + audio_data = paddle.layer.data( + name="audio_spectrogram", + type=paddle.data_type.dense_array(161 * 161)) + text_data = paddle.layer.data( + name="transcript_text", + type=paddle.data_type.integer_value_sequence(vocab_size)) + self._log_probs, self._loss = deep_speech2( + audio_data=audio_data, + text_data=text_data, + dict_size=vocab_size, + num_conv_layers=num_conv_layers, + num_rnn_layers=num_rnn_layers, + rnn_size=rnn_layer_size) diff --git a/train.py b/train.py index 6481074c6..45f7a6d9d 100644 --- a/train.py +++ b/train.py @@ -3,15 +3,11 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import sys -import os import argparse -import gzip -import time import distutils.util import multiprocessing import paddle.v2 as paddle -from model import deep_speech2 +from model import DeepSpeech2Model from data_utils.data import DataGenerator import utils @@ -23,6 +19,12 @@ parser.add_argument( default=200, type=int, help="Training pass number. (default: %(default)s)") +parser.add_argument( + "--num_iterations_print", + default=100, + type=int, + help="Number of iterations for every train cost printing. " + "(default: %(default)s)") parser.add_argument( "--num_conv_layers", default=2, @@ -127,100 +129,47 @@ args = parser.parse_args() def train(): """DeepSpeech2 training.""" - - # initialize data generator - def data_generator(): - return DataGenerator( - vocab_filepath=args.vocab_filepath, - mean_std_filepath=args.mean_std_filepath, - augmentation_config=args.augmentation_config, - max_duration=args.max_duration, - min_duration=args.min_duration, - specgram_type=args.specgram_type, - num_threads=args.num_threads_data) - - train_generator = data_generator() - test_generator = data_generator() - - # create network config - # paddle.data_type.dense_array is used for variable batch input. - # The size 161 * 161 is only an placeholder value and the real shape - # of input batch data will be induced during training. - audio_data = paddle.layer.data( - name="audio_spectrogram", type=paddle.data_type.dense_array(161 * 161)) - text_data = paddle.layer.data( - name="transcript_text", - type=paddle.data_type.integer_value_sequence( - train_generator.vocab_size)) - cost = deep_speech2( - audio_data=audio_data, - text_data=text_data, - dict_size=train_generator.vocab_size, - num_conv_layers=args.num_conv_layers, - num_rnn_layers=args.num_rnn_layers, - rnn_size=args.rnn_layer_size, - is_inference=False) - - # create/load parameters and optimizer - if args.init_model_path is None: - parameters = paddle.parameters.create(cost) - else: - if not os.path.isfile(args.init_model_path): - raise IOError("Invalid model!") - parameters = paddle.parameters.Parameters.from_tar( - gzip.open(args.init_model_path)) - optimizer = paddle.optimizer.Adam( - learning_rate=args.adam_learning_rate, gradient_clipping_threshold=400) - trainer = paddle.trainer.SGD( - cost=cost, parameters=parameters, update_equation=optimizer) - - # prepare data reader + train_generator = DataGenerator( + vocab_filepath=args.vocab_filepath, + mean_std_filepath=args.mean_std_filepath, + augmentation_config=args.augmentation_config, + max_duration=args.max_duration, + min_duration=args.min_duration, + specgram_type=args.specgram_type, + num_threads=args.num_threads_data) + dev_generator = DataGenerator( + vocab_filepath=args.vocab_filepath, + mean_std_filepath=args.mean_std_filepath, + augmentation_config="{}", + specgram_type=args.specgram_type, + num_threads=args.num_threads_data) train_batch_reader = train_generator.batch_reader_creator( manifest_path=args.train_manifest_path, batch_size=args.batch_size, min_batch_size=args.trainer_count, sortagrad=args.use_sortagrad if args.init_model_path is None else False, shuffle_method=args.shuffle_method) - test_batch_reader = test_generator.batch_reader_creator( + dev_batch_reader = dev_generator.batch_reader_creator( manifest_path=args.dev_manifest_path, batch_size=args.batch_size, min_batch_size=1, # must be 1, but will have errors. sortagrad=False, shuffle_method=None) - # create event handler - def event_handler(event): - global start_time, cost_sum, cost_counter - if isinstance(event, paddle.event.EndIteration): - cost_sum += event.cost - cost_counter += 1 - if (event.batch_id + 1) % 100 == 0: - print("\nPass: %d, Batch: %d, TrainCost: %f" % ( - event.pass_id, event.batch_id + 1, cost_sum / cost_counter)) - cost_sum, cost_counter = 0.0, 0 - with gzip.open("checkpoints/params.latest.tar.gz", 'w') as f: - parameters.to_tar(f) - else: - sys.stdout.write('.') - sys.stdout.flush() - if isinstance(event, paddle.event.BeginPass): - start_time = time.time() - cost_sum, cost_counter = 0.0, 0 - if isinstance(event, paddle.event.EndPass): - result = trainer.test( - reader=test_batch_reader, feeding=test_generator.feeding) - print("\n------- Time: %d sec, Pass: %d, ValidationCost: %s" % - (time.time() - start_time, event.pass_id, result.cost)) - with gzip.open("checkpoints/params.pass-%d.tar.gz" % event.pass_id, - 'w') as f: - parameters.to_tar(f) - - # run train - trainer.train( - reader=train_batch_reader, - event_handler=event_handler, + ds2_model = DeepSpeech2Model( + vocab_size=train_generator.vocab_size, + num_conv_layers=args.num_conv_layers, + num_rnn_layers=args.num_rnn_layers, + rnn_layer_size=args.rnn_layer_size, + pretrained_model_path=args.init_model_path) + ds2_model.train( + train_batch_reader=train_batch_reader, + dev_batch_reader=dev_batch_reader, + feeding_dict=train_generator.feeding, + learning_rate=args.adam_learning_rate, + gradient_clipping=400, num_passes=args.num_passes, - feeding=train_generator.feeding) + num_iterations_print=args.num_iterations_print) def main(): diff --git a/tune.py b/tune.py index 2fcca4862..f414622e3 100644 --- a/tune.py +++ b/tune.py @@ -3,14 +3,13 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import numpy as np import distutils.util import argparse -import gzip +import multiprocessing import paddle.v2 as paddle from data_utils.data import DataGenerator -from model import deep_speech2 -from decoder import * -from lm.lm_scorer import LmScorer +from model import DeepSpeech2Model from error_rate import wer import utils @@ -40,6 +39,11 @@ parser.add_argument( default=True, type=distutils.util.strtobool, help="Use gpu or not. (default: %(default)s)") +parser.add_argument( + "--trainer_count", + default=8, + type=int, + help="Trainer number. (default: %(default)s)") parser.add_argument( "--num_threads_data", default=multiprocessing.cpu_count(), @@ -62,10 +66,10 @@ parser.add_argument( type=str, help="Manifest path for normalizer. (default: %(default)s)") parser.add_argument( - "--decode_manifest_path", + "--tune_manifest_path", default='datasets/manifest.test', type=str, - help="Manifest path for decoding. (default: %(default)s)") + help="Manifest path for tuning. (default: %(default)s)") parser.add_argument( "--model_filepath", default='checkpoints/params.latest.tar.gz', @@ -127,96 +131,64 @@ args = parser.parse_args() def tune(): """Tune parameters alpha and beta on one minibatch.""" - if not args.num_alphas >= 0: raise ValueError("num_alphas must be non-negative!") - if not args.num_betas >= 0: raise ValueError("num_betas must be non-negative!") - # initialize data generator data_generator = DataGenerator( vocab_filepath=args.vocab_filepath, mean_std_filepath=args.mean_std_filepath, augmentation_config='{}', specgram_type=args.specgram_type, num_threads=args.num_threads_data) - - # create network config - # paddle.data_type.dense_array is used for variable batch input. - # The size 161 * 161 is only an placeholder value and the real shape - # of input batch data will be induced during training. - audio_data = paddle.layer.data( - name="audio_spectrogram", type=paddle.data_type.dense_array(161 * 161)) - text_data = paddle.layer.data( - name="transcript_text", - type=paddle.data_type.integer_value_sequence(data_generator.vocab_size)) - output_probs = deep_speech2( - audio_data=audio_data, - text_data=text_data, - dict_size=data_generator.vocab_size, - num_conv_layers=args.num_conv_layers, - num_rnn_layers=args.num_rnn_layers, - rnn_size=args.rnn_layer_size, - is_inference=True) - - # load parameters - parameters = paddle.parameters.Parameters.from_tar( - gzip.open(args.model_filepath)) - - # prepare infer data batch_reader = data_generator.batch_reader_creator( - manifest_path=args.decode_manifest_path, + manifest_path=args.tune_manifest_path, batch_size=args.num_samples, sortagrad=False, shuffle_method=None) - # get one batch data for tuning - infer_data = batch_reader().next() - - # run inference - infer_results = paddle.infer( - output_layer=output_probs, parameters=parameters, input=infer_data) - num_steps = len(infer_results) // len(infer_data) - probs_split = [ - infer_results[i * num_steps:(i + 1) * num_steps] - for i in xrange(0, len(infer_data)) + tune_data = batch_reader().next() + target_transcripts = [ + ''.join([data_generator.vocab_list[token] for token in transcript]) + for _, transcript in tune_data ] + ds2_model = DeepSpeech2Model( + vocab_size=data_generator.vocab_size, + num_conv_layers=args.num_conv_layers, + num_rnn_layers=args.num_rnn_layers, + rnn_layer_size=args.rnn_layer_size, + pretrained_model_path=args.model_filepath) + # create grid for search cand_alphas = np.linspace(args.alpha_from, args.alpha_to, args.num_alphas) cand_betas = np.linspace(args.beta_from, args.beta_to, args.num_betas) params_grid = [(alpha, beta) for alpha in cand_alphas for beta in cand_betas] - ext_scorer = LmScorer(args.alpha_from, args.beta_from, - args.language_model_path) ## tune parameters in loop for alpha, beta in params_grid: - wer_sum, wer_counter = 0, 0 - # reset scorer - ext_scorer.reset_params(alpha, beta) - # beam search using multiple processes - beam_search_results = ctc_beam_search_decoder_batch( - probs_split=probs_split, - vocabulary=data_generator.vocab_list, + result_transcripts = ds2_model.infer_batch( + infer_data=tune_data, + decode_method='beam_search', + beam_alpha=alpha, + beam_beta=beta, beam_size=args.beam_size, cutoff_prob=args.cutoff_prob, - blank_id=len(data_generator.vocab_list), - num_processes=args.num_processes_beam_search, - ext_scoring_func=ext_scorer, ) - for i, beam_search_result in enumerate(beam_search_results): - target_transcription = ''.join([ - data_generator.vocab_list[index] for index in infer_data[i][1] - ]) - wer_sum += wer(target_transcription, beam_search_result[0][1]) - wer_counter += 1 - + vocab_list=data_generator.vocab_list, + language_model_path=args.language_model_path, + num_processes=args.num_processes_beam_search) + wer_sum, num_ins = 0.0, 0 + for target, result in zip(target_transcripts, result_transcripts): + wer_sum += wer(target, result) + num_ins += 1 print("alpha = %f\tbeta = %f\tWER = %f" % - (alpha, beta, wer_sum / wer_counter)) + (alpha, beta, wer_sum / num_ins)) def main(): - paddle.init(use_gpu=args.use_gpu, trainer_count=1) + utils.print_arguments(args) + paddle.init(use_gpu=args.use_gpu, trainer_count=args.trainer_count) tune() From 5e20dfd4fb67eb10d64baede976c06314c5e8d37 Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Tue, 1 Aug 2017 17:30:30 +0800 Subject: [PATCH 101/335] change the wget method in run.sh of deep_speech2 --- datasets/librispeech/librispeech.py | 4 ++-- requirements.txt | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/datasets/librispeech/librispeech.py b/datasets/librispeech/librispeech.py index 87e52ae4a..7e941f0ea 100644 --- a/datasets/librispeech/librispeech.py +++ b/datasets/librispeech/librispeech.py @@ -11,7 +11,7 @@ from __future__ import print_function import distutils.util import os -import wget +import sys import tarfile import argparse import soundfile @@ -66,7 +66,7 @@ def download(url, md5sum, target_dir): filepath = os.path.join(target_dir, url.split("/")[-1]) if not (os.path.exists(filepath) and md5file(filepath) == md5sum): print("Downloading %s ..." % url) - wget.download(url, target_dir) + os.system("wget -c " + url + " -P " + target_dir) print("\nMD5 Chesksum %s ..." % filepath) if not md5file(filepath) == md5sum: raise RuntimeError("MD5 checksum failed.") diff --git a/requirements.txt b/requirements.txt index eb6022599..131f75ff4 100755 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,3 @@ -wget==3.2 scipy==0.13.1 resampy==0.1.5 SoundFile==0.9.0.post1 From a48469b9b6debe5f7f4b0160c8dd402812228175 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Wed, 2 Aug 2017 15:24:29 +0800 Subject: [PATCH 102/335] add the requirement for cuDNN version in README --- README.md | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 22d0c5386..62b051714 100644 --- a/README.md +++ b/README.md @@ -2,12 +2,20 @@ ## Installation -Please replace `$PADDLE_INSTALL_DIR` with your own paddle installation directory. +### Prerequisites + + - **Python = 2.7** only supported; + - **cuDNN >= 6.0** is required to utilize NVIDIA GPU platform in the installation of PaddlePaddle, and the **CUDA toolkit** with proper version suitable for cuDNN. The cuDNN library below 6.0 is found to yield a fatal error in batch normalization when handling utterances with long duration in inference. + +### Setup ``` sh setup.sh export LD_LIBRARY_PATH=$PADDLE_INSTALL_DIR/Paddle/third_party/install/warpctc/lib:$LD_LIBRARY_PATH ``` + +Please replace `$PADDLE_INSTALL_DIR` with your own paddle installation directory. + ## Usage ### Preparing Data From 526e18b11964b00ced661e0119244d7bf8e0229a Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Wed, 2 Aug 2017 23:50:46 +0800 Subject: [PATCH 103/335] Add function docs for layer.py and model.py and update other details. --- decoder.py | 2 +- infer.py | 2 +- layer.py | 84 ++++++++++++++++++++++++++++++++++-------------------- model.py | 74 +++++++++++++++++++++++++++++++++++++++++++++-- setup.sh | 3 -- train.py | 8 +++++- tune.py | 4 +-- 7 files changed, 136 insertions(+), 41 deletions(-) diff --git a/decoder.py b/decoder.py index a1fadc2c8..8f2e0508d 100644 --- a/decoder.py +++ b/decoder.py @@ -205,9 +205,9 @@ def ctc_beam_search_decoder_batch(probs_split, :type num_processes: int :param cutoff_prob: Cutoff probability in pruning, default 1.0, no pruning. + :type cutoff_prob: float :param num_processes: Number of parallel processes. :type num_processes: int - :type cutoff_prob: float :param ext_scoring_func: External scoring function for partially decoded sentence, e.g. word count or language model. diff --git a/infer.py b/infer.py index ec65cc748..bc77dab70 100644 --- a/infer.py +++ b/infer.py @@ -40,7 +40,7 @@ parser.add_argument( help="Use gpu or not. (default: %(default)s)") parser.add_argument( "--num_threads_data", - default=multiprocessing.cpu_count(), + default=1, type=int, help="Number of cpu threads for preprocessing data. (default: %(default)s)") parser.add_argument( diff --git a/layer.py b/layer.py index 7b0273389..3b492645d 100644 --- a/layer.py +++ b/layer.py @@ -5,13 +5,27 @@ from __future__ import print_function import paddle.v2 as paddle -DISABLE_CUDNN_BATCH_NORM = True - def conv_bn_layer(input, filter_size, num_channels_in, num_channels_out, stride, padding, act): - """ - Convolution layer with batch normalization. + """Convolution layer with batch normalization. + + :param input: Input layer. + :type input: LayerOutput + :param filter_size: The x dimension of a filter kernel. Or input a tuple for + two image dimension. + :type filter_size: int|tuple|list + :param num_channels_in: Number of input channels. + :type num_channels_in: int + :type num_channels_out: Number of output channels. + :type num_channels_in: out + :param padding: The x dimension of the padding. Or input a tuple for two + image dimension. + :type padding: int|tuple|list + :param act: Activation type. + :type act: BaseActivation + :return: Batch norm layer after convolution layer. + :rtype: LayerOutput """ conv_layer = paddle.layer.img_conv( input=input, @@ -22,32 +36,30 @@ def conv_bn_layer(input, filter_size, num_channels_in, num_channels_out, stride, padding=padding, act=paddle.activation.Linear(), bias_attr=False) - if DISABLE_CUDNN_BATCH_NORM: - # temopary patch, need to be removed. - return paddle.layer.batch_norm( - input=conv_layer, act=act, batch_norm_type="batch_norm") - else: - return paddle.layer.batch_norm(input=conv_layer, act=act) + return paddle.layer.batch_norm(input=conv_layer, act=act) def bidirectional_simple_rnn_bn_layer(name, input, size, act): - """ - Bidirectonal simple rnn layer with sequence-wise batch normalization. + """Bidirectonal simple rnn layer with sequence-wise batch normalization. The batch normalization is only performed on input-state weights. + + :param name: Name of the layer. + :type name: string + :param input: Input layer. + :type input: LayerOutput + :param size: Number of RNN cells. + :type size: int + :param act: Activation type. + :type act: BaseActivation + :return: Bidirectional simple rnn layer. + :rtype: LayerOutput """ # input-hidden weights shared across bi-direcitonal rnn. input_proj = paddle.layer.fc( input=input, size=size, act=paddle.activation.Linear(), bias_attr=False) # batch norm is only performed on input-state projection - if DISABLE_CUDNN_BATCH_NORM: - # temopary patch, need to be removed. - input_proj_bn = paddle.layer.batch_norm( - input=input_proj, - act=paddle.activation.Linear(), - batch_norm_type="batch_norm") - else: - input_proj_bn = paddle.layer.batch_norm( - input=input_proj, act=paddle.activation.Linear()) + input_proj_bn = paddle.layer.batch_norm( + input=input_proj, act=paddle.activation.Linear()) # forward and backward in time forward_simple_rnn = paddle.layer.recurrent( input=input_proj_bn, act=act, reverse=False) @@ -57,8 +69,14 @@ def bidirectional_simple_rnn_bn_layer(name, input, size, act): def conv_group(input, num_stacks): - """ - Convolution group with several stacking convolution layers. + """Convolution group with stacked convolution layers. + + :param input: Input layer. + :type input: LayerOutput + :param num_stacks: Number of stacked convolution layers. + :type num_stacks: int + :return: Output layer of the convolution group. + :rtype: LayerOutput """ conv = conv_bn_layer( input=input, @@ -83,8 +101,16 @@ def conv_group(input, num_stacks): def rnn_group(input, size, num_stacks): - """ - RNN group with several stacking RNN layers. + """RNN group with stacked bidirectional simple RNN layers. + + :param input: Input layer. + :type input: LayerOutput + :param size: Number of RNN cells in each layer. + :type size: int + :param num_stacks: Number of stacked rnn layers. + :type num_stacks: int + :return: Output layer of the RNN group. + :rtype: LayerOutput """ output = input for i in xrange(num_stacks): @@ -114,12 +140,8 @@ def deep_speech2(audio_data, :type num_rnn_layers: int :param rnn_size: RNN layer size (number of RNN cells). :type rnn_size: int - :param is_inference: False in the training mode, and True in the - inferene mode. - :type is_inference: bool - :return: If is_inference set False, return a ctc cost layer; - if is_inference set True, return a sequence layer of output - probability distribution. + :return: A tuple of an output unnormalized log probability layer ( + before softmax) and a ctc cost layer. :rtype: tuple of LayerOutput """ # convolution group diff --git a/model.py b/model.py index d1efabb75..f5333f170 100644 --- a/model.py +++ b/model.py @@ -14,6 +14,21 @@ from layer import * class DeepSpeech2Model(object): + """DeepSpeech2Model class. + + :param vocab_size: Decoding vocabulary size. + :type vocab_size: int + :param num_conv_layers: Number of stacking convolution layers. + :type num_conv_layers: int + :param num_rnn_layers: Number of stacking RNN layers. + :type num_rnn_layers: int + :param rnn_layer_size: RNN layer size (number of RNN cells). + :type rnn_layer_size: int + :param pretrained_model_path: Pretrained model path. If None, will train + from stratch. + :type pretrained_model_path: basestring|None + """ + def __init__(self, vocab_size, num_conv_layers, num_rnn_layers, rnn_layer_size, pretrained_model_path): self._create_network(vocab_size, num_conv_layers, num_rnn_layers, @@ -29,8 +44,33 @@ class DeepSpeech2Model(object): learning_rate, gradient_clipping, num_passes, - num_iterations_print=100, - output_model_dir='checkpoints'): + output_model_dir, + num_iterations_print=100): + """Train the model. + + :param train_batch_reader: Train data reader. + :type train_batch_reader: callable + :param dev_batch_reader: Validation data reader. + :type dev_batch_reader: callable + :param feeding_dict: Feeding is a map of field name and tuple index + of the data that reader returns. + :type feeding_dict: dict|list + :param learning_rate: Learning rate for ADAM optimizer. + :type learning_rate: float + :param gradient_clipping: Gradient clipping threshold. + :type gradient_clipping: float + :param num_passes: Number of training epochs. + :type num_passes: int + :param num_iterations_print: Number of training iterations for printing + a training loss. + :type rnn_iteratons_print: int + :param output_model_dir: Directory for saving the model (every pass). + :type output_model_dir: basestring + """ + # prepare model output directory + if not os.path.exists(output_model_dir): + os.mkdir(output_model_dir) + # prepare optimizer and trainer optimizer = paddle.optimizer.Adam( learning_rate=learning_rate, @@ -81,6 +121,34 @@ class DeepSpeech2Model(object): def infer_batch(self, infer_data, decode_method, beam_alpha, beam_beta, beam_size, cutoff_prob, vocab_list, language_model_path, num_processes): + """Model inference. Infer the transcription for a batch of speech + utterances. + + :param infer_data: List of utterances to infer, with each utterance a + tuple of audio features and transcription text (empty + string). + :type infer_data: list + :param decode_method: Decoding method name, 'best_path' or + 'beam search'. + :param decode_method: string + :param beam_alpha: Parameter associated with language model. + :type beam_alpha: float + :param beam_beta: Parameter associated with word count. + :type beam_beta: float + :param beam_size: Width for Beam search. + :type beam_size: int + :param cutoff_prob: Cutoff probability in pruning, + default 1.0, no pruning. + :type cutoff_prob: float + :param vocab_list: List of tokens in the vocabulary, for decoding. + :type vocab_list: list + :param language_model_path: Filepath for language model. + :type language_model_path: basestring|None + :param num_processes: Number of processes (CPU) for decoder. + :type num_processes: int + :return: List of transcription texts. + :rtype: List of basestring + """ # define inferer if self._inferer == None: self._inferer = paddle.inference.Inference( @@ -126,6 +194,7 @@ class DeepSpeech2Model(object): return results def _create_parameters(self, model_path=None): + """Load or create model parameters.""" if model_path is None: self._parameters = paddle.parameters.create(self._loss) else: @@ -134,6 +203,7 @@ class DeepSpeech2Model(object): def _create_network(self, vocab_size, num_conv_layers, num_rnn_layers, rnn_layer_size): + """Create data layers and model network.""" # paddle.data_type.dense_array is used for variable batch input. # The size 161 * 161 is only an placeholder value and the real shape # of input batch data will be induced during training. diff --git a/setup.sh b/setup.sh index 4d451a6f1..7f4272550 100644 --- a/setup.sh +++ b/setup.sh @@ -26,7 +26,4 @@ if [ $? != 0 ]; then rm libsndfile-1.0.28.tar.gz fi -# prepare ./checkpoints -mkdir checkpoints - echo "Install all dependencies successfully." diff --git a/train.py b/train.py index 45f7a6d9d..080f57d2d 100644 --- a/train.py +++ b/train.py @@ -116,6 +116,11 @@ parser.add_argument( help="If set None, the training will start from scratch. " "Otherwise, the training will resume from " "the existing model of this path. (default: %(default)s)") +parser.add_argument( + "--output_model_dir", + default="./checkpoints", + type=str, + help="Directory for saving models. (default: %(default)s)") parser.add_argument( "--augmentation_config", default='[{"type": "shift", ' @@ -169,7 +174,8 @@ def train(): learning_rate=args.adam_learning_rate, gradient_clipping=400, num_passes=args.num_passes, - num_iterations_print=args.num_iterations_print) + num_iterations_print=args.num_iterations_print, + output_model_dir=args.output_model_dir) def main(): diff --git a/tune.py b/tune.py index f414622e3..a17be30fa 100644 --- a/tune.py +++ b/tune.py @@ -46,7 +46,7 @@ parser.add_argument( help="Trainer number. (default: %(default)s)") parser.add_argument( "--num_threads_data", - default=multiprocessing.cpu_count(), + default=1, type=int, help="Number of cpu threads for preprocessing data. (default: %(default)s)") parser.add_argument( @@ -67,7 +67,7 @@ parser.add_argument( help="Manifest path for normalizer. (default: %(default)s)") parser.add_argument( "--tune_manifest_path", - default='datasets/manifest.test', + default='datasets/manifest.dev', type=str, help="Manifest path for tuning. (default: %(default)s)") parser.add_argument( From 0ebf36b98fb8484b44bb512e76f6b94a1799a1c2 Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Wed, 2 Aug 2017 21:45:08 +0800 Subject: [PATCH 104/335] Add a realtime ASR demo for users to test their own voice with mic. --- data_utils/audio.py | 2 + data_utils/data.py | 29 +++--- demo_client.py | 75 ++++++++++++++++ demo_server.py | 208 ++++++++++++++++++++++++++++++++++++++++++++ infer.py | 13 +-- model.py | 10 +++ requirements.txt | 2 + 7 files changed, 320 insertions(+), 19 deletions(-) create mode 100644 demo_client.py create mode 100644 demo_server.py mode change 100755 => 100644 requirements.txt diff --git a/data_utils/audio.py b/data_utils/audio.py index 3891f5b92..29fdd0bd8 100644 --- a/data_utils/audio.py +++ b/data_utils/audio.py @@ -64,6 +64,8 @@ class AudioSegment(object): :rtype: AudioSegment """ samples, sample_rate = soundfile.read(file, dtype='float32') + print(samples) + print(sample_rate) return cls(samples, sample_rate) @classmethod diff --git a/data_utils/data.py b/data_utils/data.py index d01ca8cc7..fe064b806 100644 --- a/data_utils/data.py +++ b/data_utils/data.py @@ -83,6 +83,23 @@ class DataGenerator(object): self._rng = random.Random(random_seed) self._epoch = 0 + def process_utterance(self, filename, transcript): + """Load, augment, featurize and normalize for speech data. + + :param filename: Audio filepath + :type filename: basestring + :param transcript: Transcription text. + :type transcript: basestring + :return: Tuple of audio feature tensor and list of token ids for + transcription. + :rtype: tuple of (2darray, list) + """ + speech_segment = SpeechSegment.from_file(filename, transcript) + self._augmentation_pipeline.transform_audio(speech_segment) + specgram, text_ids = self._speech_featurizer.featurize(speech_segment) + specgram = self._normalizer.apply(specgram) + return specgram, text_ids + def batch_reader_creator(self, manifest_path, batch_size, @@ -198,14 +215,6 @@ class DataGenerator(object): """ return self._speech_featurizer.vocab_list - def _process_utterance(self, filename, transcript): - """Load, augment, featurize and normalize for speech data.""" - speech_segment = SpeechSegment.from_file(filename, transcript) - self._augmentation_pipeline.transform_audio(speech_segment) - specgram, text_ids = self._speech_featurizer.featurize(speech_segment) - specgram = self._normalizer.apply(specgram) - return specgram, text_ids - def _instance_reader_creator(self, manifest): """ Instance reader creator. Create a callable function to produce @@ -220,8 +229,8 @@ class DataGenerator(object): yield instance def mapper(instance): - return self._process_utterance(instance["audio_filepath"], - instance["text"]) + return self.process_utterance(instance["audio_filepath"], + instance["text"]) return paddle.reader.xmap_readers( mapper, reader, self._num_threads, 1024, order=True) diff --git a/demo_client.py b/demo_client.py new file mode 100644 index 000000000..97649fd48 --- /dev/null +++ b/demo_client.py @@ -0,0 +1,75 @@ +from pynput import keyboard +import struct +import socket +import sys +import pyaudio + +HOST, PORT = "10.104.18.14", 8086 + +is_recording = False +enable_trigger_record = True + + +def on_press(key): + global is_recording, enable_trigger_record + if key == keyboard.Key.space: + if (not is_recording) and enable_trigger_record: + sys.stdout.write("Start Recording ... ") + sys.stdout.flush() + is_recording = True + + +def on_release(key): + global is_recording, enable_trigger_record + if key == keyboard.Key.esc: + return False + elif key == keyboard.Key.space: + if is_recording == True: + is_recording = False + + +data_list = [] + + +def callback(in_data, frame_count, time_info, status): + global data_list, is_recording, enable_trigger_record + if is_recording: + data_list.append(in_data) + enable_trigger_record = False + elif len(data_list) > 0: + # Connect to server and send data + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.connect((HOST, PORT)) + sent = ''.join(data_list) + sock.sendall(struct.pack('>i', len(sent)) + sent) + print('Speech[length=%d] Sent.' % len(sent)) + # Receive data from the server and shut down + received = sock.recv(1024) + print "Recognition Results: {}".format(received) + sock.close() + data_list = [] + enable_trigger_record = True + return (in_data, pyaudio.paContinue) + + +def main(): + p = pyaudio.PyAudio() + stream = p.open( + format=pyaudio.paInt32, + channels=1, + rate=16000, + input=True, + stream_callback=callback) + stream.start_stream() + + with keyboard.Listener( + on_press=on_press, on_release=on_release) as listener: + listener.join() + + stream.stop_stream() + stream.close() + p.terminate() + + +if __name__ == "__main__": + main() diff --git a/demo_server.py b/demo_server.py new file mode 100644 index 000000000..4a3feb138 --- /dev/null +++ b/demo_server.py @@ -0,0 +1,208 @@ +import os +import time +import argparse +import distutils.util +from time import gmtime, strftime +import SocketServer +import struct +import wave +import pyaudio +import paddle.v2 as paddle +from data_utils.data import DataGenerator +from model import DeepSpeech2Model +import utils + +parser = argparse.ArgumentParser(description=__doc__) +parser.add_argument( + "--host_ip", + default="10.104.18.14", + type=str, + help="Server IP address. (default: %(default)s)") +parser.add_argument( + "--host_port", + default=8086, + type=int, + help="Server Port. (default: %(default)s)") +parser.add_argument( + "--speech_save_dir", + default="demo_cache", + type=str, + help="Directory for saving demo speech. (default: %(default)s)") +parser.add_argument( + "--vocab_filepath", + default='datasets/vocab/eng_vocab.txt', + type=str, + help="Vocabulary filepath. (default: %(default)s)") +parser.add_argument( + "--mean_std_filepath", + default='mean_std.npz', + type=str, + help="Manifest path for normalizer. (default: %(default)s)") +parser.add_argument( + "--specgram_type", + default='linear', + type=str, + help="Feature type of audio data: 'linear' (power spectrum)" + " or 'mfcc'. (default: %(default)s)") +parser.add_argument( + "--num_conv_layers", + default=2, + type=int, + help="Convolution layer number. (default: %(default)s)") +parser.add_argument( + "--num_rnn_layers", + default=3, + type=int, + help="RNN layer number. (default: %(default)s)") +parser.add_argument( + "--rnn_layer_size", + default=512, + type=int, + help="RNN layer cell number. (default: %(default)s)") +parser.add_argument( + "--use_gpu", + default=True, + type=distutils.util.strtobool, + help="Use gpu or not. (default: %(default)s)") +parser.add_argument( + "--model_filepath", + default='checkpoints/params.latest.tar.gz', + type=str, + help="Model filepath. (default: %(default)s)") +parser.add_argument( + "--decode_method", + default='beam_search', + type=str, + help="Method for ctc decoding: best_path or beam_search. " + "(default: %(default)s)") +parser.add_argument( + "--beam_size", + default=500, + type=int, + help="Width for beam search decoding. (default: %(default)d)") +parser.add_argument( + "--language_model_path", + default="lm/data/common_crawl_00.prune01111.trie.klm", + type=str, + help="Path for language model. (default: %(default)s)") +parser.add_argument( + "--alpha", + default=0.36, + type=float, + help="Parameter associated with language model. (default: %(default)f)") +parser.add_argument( + "--beta", + default=0.25, + type=float, + help="Parameter associated with word count. (default: %(default)f)") +parser.add_argument( + "--cutoff_prob", + default=0.99, + type=float, + help="The cutoff probability of pruning" + "in beam search. (default: %(default)f)") +args = parser.parse_args() + + +class AsrTCPServer(SocketServer.TCPServer): + def __init__(self, + server_address, + RequestHandlerClass, + speech_save_dir, + audio_process_handler, + bind_and_activate=True): + self.speech_save_dir = speech_save_dir + self.audio_process_handler = audio_process_handler + SocketServer.TCPServer.__init__( + self, server_address, RequestHandlerClass, bind_and_activate=True) + + +class AsrRequestHandler(SocketServer.BaseRequestHandler): + """The ASR request handler. + """ + + def handle(self): + # receive data through TCP socket + chunk = self.request.recv(1024) + target_len = struct.unpack('>i', chunk[:4])[0] + data = chunk[4:] + while len(data) < target_len: + chunk = self.request.recv(1024) + data += chunk + # write to file + filename = self._write_to_file(data) + + print("Received utterance[length=%d] from %s, saved to %s." % + (len(data), self.client_address[0], filename)) + #filename = "/home/work/.cache/paddle/dataset/speech/Libri/train-other-500/LibriSpeech/train-other-500/811/130143/811-130143-0025.flac" + start_time = time.time() + transcript = self.server.audio_process_handler(filename) + finish_time = time.time() + print("Response Time: %f, Transcript: %s" % + (finish_time - start_time, transcript)) + self.request.sendall(transcript) + + def _write_to_file(self, data): + # prepare save dir and filename + if not os.path.exists(self.server.speech_save_dir): + os.mkdir(self.server.speech_save_dir) + timestamp = strftime("%Y%m%d%H%M%S", gmtime()) + out_filename = os.path.join( + self.server.speech_save_dir, + timestamp + "_" + self.client_address[0] + "_" + ".wav") + # write to wav file + file = wave.open(out_filename, 'wb') + file.setnchannels(1) + file.setsampwidth(4) + file.setframerate(16000) + file.writeframes(data) + file.close() + return out_filename + + +def start_server(): + data_generator = DataGenerator( + vocab_filepath=args.vocab_filepath, + mean_std_filepath=args.mean_std_filepath, + augmentation_config='{}', + specgram_type=args.specgram_type, + num_threads=1) + ds2_model = DeepSpeech2Model( + vocab_size=data_generator.vocab_size, + num_conv_layers=args.num_conv_layers, + num_rnn_layers=args.num_rnn_layers, + rnn_layer_size=args.rnn_layer_size, + pretrained_model_path=args.model_filepath) + + def file_to_transcript(filename): + feature = data_generator.process_utterance(filename, "") + result_transcript = ds2_model.infer_batch( + infer_data=[feature], + decode_method=args.decode_method, + beam_alpha=args.alpha, + beam_beta=args.beta, + beam_size=args.beam_size, + cutoff_prob=args.cutoff_prob, + vocab_list=data_generator.vocab_list, + language_model_path=args.language_model_path, + num_processes=1) + return result_transcript[0] + + server = AsrTCPServer( + server_address=(args.host_ip, args.host_port), + RequestHandlerClass=AsrRequestHandler, + speech_save_dir=args.speech_save_dir, + audio_process_handler=file_to_transcript) + + print("ASR Server Started.") + server.serve_forever() + + +def main(): + utils.print_arguments(args) + paddle.init(use_gpu=args.use_gpu, trainer_count=1) + start_server() + + +if __name__ == "__main__": + main() diff --git a/infer.py b/infer.py index bc77dab70..8fd27dce4 100644 --- a/infer.py +++ b/infer.py @@ -83,18 +83,13 @@ parser.add_argument( "--decode_method", default='beam_search', type=str, - help="Method for ctc decoding: best_path or beam_search. (default: %(default)s)" -) + help="Method for ctc decoding: best_path or beam_search. " + "(default: %(default)s)") parser.add_argument( "--beam_size", default=500, type=int, help="Width for beam search decoding. (default: %(default)d)") -parser.add_argument( - "--num_results_per_sample", - default=1, - type=int, - help="Number of output per sample in beam search. (default: %(default)d)") parser.add_argument( "--language_model_path", default="lm/data/common_crawl_00.prune01111.trie.klm", @@ -102,12 +97,12 @@ parser.add_argument( help="Path for language model. (default: %(default)s)") parser.add_argument( "--alpha", - default=0.26, + default=0.36, type=float, help="Parameter associated with language model. (default: %(default)f)") parser.add_argument( "--beta", - default=0.1, + default=0.25, type=float, help="Parameter associated with word count. (default: %(default)f)") parser.add_argument( diff --git a/model.py b/model.py index f5333f170..c8766deb1 100644 --- a/model.py +++ b/model.py @@ -35,6 +35,7 @@ class DeepSpeech2Model(object): rnn_layer_size) self._create_parameters(pretrained_model_path) self._inferer = None + self._loss_inferer = None self._ext_scorer = None def train(self, @@ -118,6 +119,14 @@ class DeepSpeech2Model(object): num_passes=num_passes, feeding=feeding_dict) + def infer_loss_batch(self, infer_data): + # define inferer + if self._loss_inferer == None: + self._loss_inferer = paddle.inference.Inference( + output_layer=self._loss, parameters=self._parameters) + # run inference + return self._loss_inferer.infer(input=infer_data) + def infer_batch(self, infer_data, decode_method, beam_alpha, beam_beta, beam_size, cutoff_prob, vocab_list, language_model_path, num_processes): @@ -187,6 +196,7 @@ class DeepSpeech2Model(object): num_processes=num_processes, ext_scoring_func=self._ext_scorer, cutoff_prob=cutoff_prob) + results = [result[0][1] for result in beam_search_results] else: raise ValueError("Decoding method [%s] is not supported." % diff --git a/requirements.txt b/requirements.txt old mode 100755 new mode 100644 index 131f75ff4..9297f659c --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,5 @@ resampy==0.1.5 SoundFile==0.9.0.post1 python_speech_features https://github.com/luotao1/kenlm/archive/master.zip +pyaudio +pynput From 6bc445f2359b91a28e15d9a5339e06f72b003c53 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Thu, 3 Aug 2017 11:58:09 +0800 Subject: [PATCH 105/335] refine the interface of decoders in swig --- deploy.py | 20 ++--- ...am_search_decoder.cpp => ctc_decoders.cpp} | 24 +++--- ...c_beam_search_decoder.h => ctc_decoders.h} | 11 ++- ...c_beam_search_decoder.i => ctc_decoders.i} | 6 +- deploy/decoder_setup.py | 16 ++-- deploy/scorer.cpp | 12 +-- deploy/scorer.h | 10 +-- deploy/swig_decoders.py | 86 +++++++++++++++++++ 8 files changed, 137 insertions(+), 48 deletions(-) rename deploy/{ctc_beam_search_decoder.cpp => ctc_decoders.cpp} (94%) rename deploy/{ctc_beam_search_decoder.h => ctc_decoders.h} (79%) rename deploy/{ctc_beam_search_decoder.i => ctc_decoders.i} (84%) create mode 100644 deploy/swig_decoders.py diff --git a/deploy.py b/deploy.py index 02152b499..70a9b9efe 100644 --- a/deploy.py +++ b/deploy.py @@ -10,8 +10,8 @@ import multiprocessing import paddle.v2 as paddle from data_utils.data import DataGenerator from model import deep_speech2 -from swig_ctc_beam_search_decoder import * -from swig_scorer import Scorer +from deploy.swig_decoders import * +from swig_scorer import LmScorer from error_rate import wer import utils import time @@ -85,7 +85,7 @@ parser.add_argument( help="Number of output per sample in beam search. (default: %(default)d)") parser.add_argument( "--language_model_path", - default="lm/data/en.00.UNKNOWN.klm", + default="lm/data/common_crawl_00.prune01111.trie.klm", type=str, help="Path for language model. (default: %(default)s)") parser.add_argument( @@ -164,19 +164,19 @@ def infer(): ] # external scorer - ext_scorer = Scorer(args.alpha, args.beta, args.language_model_path) + ext_scorer = LmScorer(args.alpha, args.beta, args.language_model_path) ## decode and print time_begin = time.time() wer_sum, wer_counter = 0, 0 for i, probs in enumerate(probs_split): beam_result = ctc_beam_search_decoder( - probs.tolist(), - args.beam_size, - data_generator.vocab_list, - len(data_generator.vocab_list), - args.cutoff_prob, - ext_scorer, ) + probs_seq=probs, + beam_size=args.beam_size, + vocabulary=data_generator.vocab_list, + blank_id=len(data_generator.vocab_list), + cutoff_prob=args.cutoff_prob, + ext_scoring_func=ext_scorer, ) print("\nTarget Transcription:\t%s" % target_transcription[i]) print("Beam %d: %f \t%s" % (0, beam_result[0][0], beam_result[0][1])) diff --git a/deploy/ctc_beam_search_decoder.cpp b/deploy/ctc_decoders.cpp similarity index 94% rename from deploy/ctc_beam_search_decoder.cpp rename to deploy/ctc_decoders.cpp index af6414a97..4cff6d5e5 100644 --- a/deploy/ctc_beam_search_decoder.cpp +++ b/deploy/ctc_decoders.cpp @@ -4,9 +4,9 @@ #include #include #include -#include "ctc_beam_search_decoder.h" +#include "ctc_decoders.h" -typedef float log_prob_type; +typedef double log_prob_type; template bool pair_comp_first_rev(const std::pair a, const std::pair b) @@ -24,8 +24,8 @@ template T log_sum_exp(T x, T y) { static T num_min = -std::numeric_limits::max(); - if (x <= -num_min) return y; - if (y <= -num_min) return x; + if (x <= num_min) return y; + if (y <= num_min) return x; T xmax = std::max(x, y); return std::log(std::exp(x-xmax) + std::exp(y-xmax)) + xmax; } @@ -55,17 +55,13 @@ std::string ctc_best_path_decoder(std::vector > probs_seq, } } max_idx_vec.push_back(max_idx); - std::cout< idx_vec; for (int i=0; i0) && max_idx_vec[i]!=max_idx_vec[i-1])) { - std::cout< > probs_seq, std::string best_path_result; for (int i=0; i > std::vector vocabulary, int blank_id, double cutoff_prob, - Scorer *ext_scorer, + LmScorer *ext_scorer, bool nproc) { // dimension check int num_time_steps = probs_seq.size(); for (int i=0; i vocabulary.size()) { - std::cout<<"Invalid blank_id!"< > vocabulary.end(), " "); int space_id = it - vocabulary.begin(); if(space_id >= vocabulary.size()) { - std::cout<<"The character space is not in the vocabulary!"< > std::vector vocabulary, int blank_id, double cutoff_prob=1.0, - Scorer *ext_scorer=NULL, + LmScorer *ext_scorer=NULL, bool nproc=false ); + /* CTC Best Path Decoder + * + * Parameters: + * probs_seq: 2-D vector that each element is a vector of probabilities + * over vocabulary of one time step. + * vocabulary: A vector of vocabulary. + * Return: + * A vector that each element is a pair of score and decoding result, + * in desending order. */ std::string ctc_best_path_decoder(std::vector > probs_seq, std::vector vocabulary); diff --git a/deploy/ctc_beam_search_decoder.i b/deploy/ctc_decoders.i similarity index 84% rename from deploy/ctc_beam_search_decoder.i rename to deploy/ctc_decoders.i index 09e893d38..c7d05238e 100644 --- a/deploy/ctc_beam_search_decoder.i +++ b/deploy/ctc_decoders.i @@ -1,6 +1,6 @@ -%module swig_ctc_beam_search_decoder +%module swig_ctc_decoders %{ -#include "ctc_beam_search_decoder.h" +#include "ctc_decoders.h" %} %include "std_vector.i" @@ -19,4 +19,4 @@ namespace std{ } %import scorer.h -%include "ctc_beam_search_decoder.h" +%include "ctc_decoders.h" diff --git a/deploy/decoder_setup.py b/deploy/decoder_setup.py index 4ed603b25..aed45faaf 100644 --- a/deploy/decoder_setup.py +++ b/deploy/decoder_setup.py @@ -34,15 +34,13 @@ if compile_test('lzma.h', 'lzma'): ARGS.append('-DHAVE_XZLIB') LIBS.append('lzma') -os.system('swig -python -c++ ./ctc_beam_search_decoder.i') +os.system('swig -python -c++ ./ctc_decoders.i') ctc_beam_search_decoder_module = [ Extension( - name='_swig_ctc_beam_search_decoder', - sources=FILES + [ - 'scorer.cpp', 'ctc_beam_search_decoder_wrap.cxx', - 'ctc_beam_search_decoder.cpp' - ], + name='_swig_ctc_decoders', + sources=FILES + + ['scorer.cpp', 'ctc_decoders_wrap.cxx', 'ctc_decoders.cpp'], language='C++', include_dirs=['.', './kenlm'], libraries=LIBS, @@ -50,8 +48,8 @@ ctc_beam_search_decoder_module = [ ] setup( - name='swig_ctc_beam_search_decoder', + name='swig_ctc_decoders', version='0.1', - description="""CTC beam search decoder""", + description="""CTC decoders""", ext_modules=ctc_beam_search_decoder_module, - py_modules=['swig_ctc_beam_search_decoder'], ) + py_modules=['swig_ctc_decoders'], ) diff --git a/deploy/scorer.cpp b/deploy/scorer.cpp index e9a74b989..7a66daad9 100644 --- a/deploy/scorer.cpp +++ b/deploy/scorer.cpp @@ -7,7 +7,7 @@ using namespace lm::ngram; -Scorer::Scorer(float alpha, float beta, std::string lm_model_path) { +LmScorer::LmScorer(float alpha, float beta, std::string lm_model_path) { this->_alpha = alpha; this->_beta = beta; @@ -18,7 +18,7 @@ Scorer::Scorer(float alpha, float beta, std::string lm_model_path) { this->_language_model = LoadVirtual(lm_model_path.c_str()); } -Scorer::~Scorer(){ +LmScorer::~LmScorer(){ delete (lm::base::Model *)this->_language_model; } @@ -57,7 +57,7 @@ inline void strip(std::string &str, char ch=' ') { } } -int Scorer::word_count(std::string sentence) { +int LmScorer::word_count(std::string sentence) { strip(sentence); int cnt = 1; for (int i=0; i_language_model; State state, out_state; lm::FullScoreReturn ret; @@ -84,12 +84,12 @@ double Scorer::language_model_score(std::string sentence) { return log_prob; } -void Scorer::reset_params(float alpha, float beta) { +void LmScorer::reset_params(float alpha, float beta) { this->_alpha = alpha; this->_beta = beta; } -double Scorer::get_score(std::string sentence, bool log) { +double LmScorer::get_score(std::string sentence, bool log) { double lm_score = language_model_score(sentence); int word_cnt = word_count(sentence); diff --git a/deploy/scorer.h b/deploy/scorer.h index a18e119bc..90a1a84a0 100644 --- a/deploy/scorer.h +++ b/deploy/scorer.h @@ -8,10 +8,10 @@ * count and language model scoring. * Example: - * Scorer ext_scorer(alpha, beta, "path_to_language_model.klm"); + * LmScorer ext_scorer(alpha, beta, "path_to_language_model.klm"); * double score = ext_scorer.get_score("sentence_to_score"); */ -class Scorer{ +class LmScorer{ private: float _alpha; float _beta; @@ -23,9 +23,9 @@ private: double language_model_score(std::string); public: - Scorer(){} - Scorer(float alpha, float beta, std::string lm_model_path); - ~Scorer(); + LmScorer(){} + LmScorer(float alpha, float beta, std::string lm_model_path); + ~LmScorer(); // reset params alpha & beta void reset_params(float alpha, float beta); diff --git a/deploy/swig_decoders.py b/deploy/swig_decoders.py new file mode 100644 index 000000000..8e4a39252 --- /dev/null +++ b/deploy/swig_decoders.py @@ -0,0 +1,86 @@ +"""Wrapper for various CTC decoders in SWIG.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import swig_ctc_decoders +import multiprocessing + + +def ctc_best_path_decoder(probs_seq, vocabulary): + """Wrapper for ctc best path decoder in swig. + + :param probs_seq: 2-D list of probability distributions over each time + step, with each element being a list of normalized + probabilities over vocabulary and blank. + :type probs_seq: 2-D list + :param vocabulary: Vocabulary list. + :type vocabulary: list + :return: Decoding result string. + :rtype: basestring + """ + return swig_ctc_decoders.ctc_best_path_decoder(probs_seq.tolist(), + vocabulary) + + +def ctc_beam_search_decoder( + probs_seq, + beam_size, + vocabulary, + blank_id, + cutoff_prob=1.0, + ext_scoring_func=None, ): + """Wrapper for CTC Beam Search Decoder. + + :param probs_seq: 2-D list of probability distributions over each time + step, with each element being a list of normalized + probabilities over vocabulary and blank. + :type probs_seq: 2-D list + :param beam_size: Width for beam search. + :type beam_size: int + :param vocabulary: Vocabulary list. + :type vocabulary: list + :param blank_id: ID of blank. + :type blank_id: int + :param cutoff_prob: Cutoff probability in pruning, + default 1.0, no pruning. + :type cutoff_prob: float + :param ext_scoring_func: External scoring function for + partially decoded sentence, e.g. word count + or language model. + :type external_scoring_func: callable + :return: List of tuples of log probability and sentence as decoding + results, in descending order of the probability. + :rtype: list + """ + return swig_ctc_decoders.ctc_beam_search_decoder( + probs_seq.tolist(), beam_size, vocabulary, blank_id, cutoff_prob, + ext_scoring_func) + + +def ctc_beam_search_decoder_batch(probs_split, + beam_size, + vocabulary, + blank_id, + num_processes, + cutoff_prob=1.0, + ext_scoring_func=None): + """Wrapper for CTC beam search decoder in batch + """ + + # TODO: to resolve PicklingError + + if not num_processes > 0: + raise ValueError("Number of processes must be positive!") + + pool = multiprocessing.Pool(processes=num_processes) + results = [] + for i, probs_list in enumerate(probs_split): + args = (probs_list, beam_size, vocabulary, blank_id, cutoff_prob, + ext_scoring_func) + results.append(pool.apply_async(ctc_beam_search_decoder, args)) + + pool.close() + pool.join() + beam_search_results = [result.get() for result in results] + return beam_search_results From cb9370f30862217c3666baabe94441fba72493a7 Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Thu, 3 Aug 2017 12:00:24 +0800 Subject: [PATCH 106/335] Add warming-up to demo_server.py for DS2 and clean codes. --- data_utils/audio.py | 2 -- demo_server.py | 40 +++++++++++++++++++++++++++++++++++----- 2 files changed, 35 insertions(+), 7 deletions(-) diff --git a/data_utils/audio.py b/data_utils/audio.py index 29fdd0bd8..3891f5b92 100644 --- a/data_utils/audio.py +++ b/data_utils/audio.py @@ -64,8 +64,6 @@ class AudioSegment(object): :rtype: AudioSegment """ samples, sample_rate = soundfile.read(file, dtype='float32') - print(samples) - print(sample_rate) return cls(samples, sample_rate) @classmethod diff --git a/demo_server.py b/demo_server.py index 4a3feb138..85f694834 100644 --- a/demo_server.py +++ b/demo_server.py @@ -1,5 +1,6 @@ import os import time +import random import argparse import distutils.util from time import gmtime, strftime @@ -8,9 +9,10 @@ import struct import wave import pyaudio import paddle.v2 as paddle +from utils import print_arguments from data_utils.data import DataGenerator from model import DeepSpeech2Model -import utils +from data_utils.utils import read_manifest parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( @@ -38,6 +40,11 @@ parser.add_argument( default='mean_std.npz', type=str, help="Manifest path for normalizer. (default: %(default)s)") +parser.add_argument( + "--warmup_manifest_path", + default='datasets/manifest.test', + type=str, + help="Manifest path for warmup test. (default: %(default)s)") parser.add_argument( "--specgram_type", default='linear', @@ -77,7 +84,7 @@ parser.add_argument( "(default: %(default)s)") parser.add_argument( "--beam_size", - default=500, + default=100, type=int, help="Width for beam search decoding. (default: %(default)d)") parser.add_argument( @@ -134,7 +141,6 @@ class AsrRequestHandler(SocketServer.BaseRequestHandler): print("Received utterance[length=%d] from %s, saved to %s." % (len(data), self.client_address[0], filename)) - #filename = "/home/work/.cache/paddle/dataset/speech/Libri/train-other-500/LibriSpeech/train-other-500/811/130143/811-130143-0025.flac" start_time = time.time() transcript = self.server.audio_process_handler(filename) finish_time = time.time() @@ -149,7 +155,7 @@ class AsrRequestHandler(SocketServer.BaseRequestHandler): timestamp = strftime("%Y%m%d%H%M%S", gmtime()) out_filename = os.path.join( self.server.speech_save_dir, - timestamp + "_" + self.client_address[0] + "_" + ".wav") + timestamp + "_" + self.client_address[0] + ".wav") # write to wav file file = wave.open(out_filename, 'wb') file.setnchannels(1) @@ -160,6 +166,22 @@ class AsrRequestHandler(SocketServer.BaseRequestHandler): return out_filename +def warm_up_test(audio_process_handler, + manifest_path, + num_test_cases, + random_seed=0): + manifest = read_manifest(manifest_path) + rng = random.Random(random_seed) + samples = rng.sample(manifest, num_test_cases) + for idx, sample in enumerate(samples): + print("Warm-up Test Case %d: %s", idx, sample['audio_filepath']) + start_time = time.time() + transcript = audio_process_handler(sample['audio_filepath']) + finish_time = time.time() + print("Response Time: %f, Transcript: %s" % + (finish_time - start_time, transcript)) + + def start_server(): data_generator = DataGenerator( vocab_filepath=args.vocab_filepath, @@ -188,6 +210,14 @@ def start_server(): num_processes=1) return result_transcript[0] + print('-----------------------------------------------------------') + print('Warming up ...') + warm_up_test( + audio_process_handler=file_to_transcript, + manifest_path=args.warmup_manifest_path, + num_test_cases=3) + print('-----------------------------------------------------------') + server = AsrTCPServer( server_address=(args.host_ip, args.host_port), RequestHandlerClass=AsrRequestHandler, @@ -199,7 +229,7 @@ def start_server(): def main(): - utils.print_arguments(args) + print_arguments(args) paddle.init(use_gpu=args.use_gpu, trainer_count=1) start_server() From a4c2dd7de2c6da129f34c8ae61db1655e812761a Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Thu, 3 Aug 2017 12:08:11 +0800 Subject: [PATCH 107/335] Add function docs and comments to demo_server.py and demo_client.py. --- demo_client.py | 6 ++++++ demo_server.py | 13 ++++++++++--- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/demo_client.py b/demo_client.py index 97649fd48..a789d8164 100644 --- a/demo_client.py +++ b/demo_client.py @@ -11,6 +11,7 @@ enable_trigger_record = True def on_press(key): + """On-press keyboard callback function.""" global is_recording, enable_trigger_record if key == keyboard.Key.space: if (not is_recording) and enable_trigger_record: @@ -20,6 +21,7 @@ def on_press(key): def on_release(key): + """On-release keyboard callback function.""" global is_recording, enable_trigger_record if key == keyboard.Key.esc: return False @@ -32,6 +34,7 @@ data_list = [] def callback(in_data, frame_count, time_info, status): + """Audio recorder's stream callback function.""" global data_list, is_recording, enable_trigger_record if is_recording: data_list.append(in_data) @@ -53,6 +56,7 @@ def callback(in_data, frame_count, time_info, status): def main(): + # prepare audio recorder p = pyaudio.PyAudio() stream = p.open( format=pyaudio.paInt32, @@ -62,10 +66,12 @@ def main(): stream_callback=callback) stream.start_stream() + # prepare keyboard listener with keyboard.Listener( on_press=on_press, on_release=on_release) as listener: listener.join() + # close up stream.stop_stream() stream.close() p.terminate() diff --git a/demo_server.py b/demo_server.py index 85f694834..d6c0de40a 100644 --- a/demo_server.py +++ b/demo_server.py @@ -112,6 +112,8 @@ args = parser.parse_args() class AsrTCPServer(SocketServer.TCPServer): + """The ASR TCP Server.""" + def __init__(self, server_address, RequestHandlerClass, @@ -125,8 +127,7 @@ class AsrTCPServer(SocketServer.TCPServer): class AsrRequestHandler(SocketServer.BaseRequestHandler): - """The ASR request handler. - """ + """The ASR request handler.""" def handle(self): # receive data through TCP socket @@ -170,6 +171,7 @@ def warm_up_test(audio_process_handler, manifest_path, num_test_cases, random_seed=0): + """Warming-up test.""" manifest = read_manifest(manifest_path) rng = random.Random(random_seed) samples = rng.sample(manifest, num_test_cases) @@ -183,12 +185,15 @@ def warm_up_test(audio_process_handler, def start_server(): + """Start the ASR server""" + # prepare data generator data_generator = DataGenerator( vocab_filepath=args.vocab_filepath, mean_std_filepath=args.mean_std_filepath, augmentation_config='{}', specgram_type=args.specgram_type, num_threads=1) + # prepare ASR model ds2_model = DeepSpeech2Model( vocab_size=data_generator.vocab_size, num_conv_layers=args.num_conv_layers, @@ -196,6 +201,7 @@ def start_server(): rnn_layer_size=args.rnn_layer_size, pretrained_model_path=args.model_filepath) + # prepare ASR inference handler def file_to_transcript(filename): feature = data_generator.process_utterance(filename, "") result_transcript = ds2_model.infer_batch( @@ -210,6 +216,7 @@ def start_server(): num_processes=1) return result_transcript[0] + # warming up with utterrances sampled from Librispeech print('-----------------------------------------------------------') print('Warming up ...') warm_up_test( @@ -218,12 +225,12 @@ def start_server(): num_test_cases=3) print('-----------------------------------------------------------') + # start the server server = AsrTCPServer( server_address=(args.host_ip, args.host_port), RequestHandlerClass=AsrRequestHandler, speech_save_dir=args.speech_save_dir, audio_process_handler=file_to_transcript) - print("ASR Server Started.") server.serve_forever() From b57d244363d997c394aebbfc0f1ab49310fd1ae4 Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Thu, 3 Aug 2017 12:26:33 +0800 Subject: [PATCH 108/335] Add ASR demo usage to README.md for DS2. --- README.md | 16 ++++++++++++++++ demo_client.py | 17 +++++++++++++++-- demo_server.py | 3 ++- 3 files changed, 33 insertions(+), 3 deletions(-) mode change 100644 => 100755 README.md diff --git a/README.md b/README.md old mode 100644 new mode 100755 index 62b051714..27dc64887 --- a/README.md +++ b/README.md @@ -143,3 +143,19 @@ python tune.py --help ``` Then reset parameters with the tuning result before inference or evaluating. + +### Playing with the ASR Demo + +A real-time ASR demo (`demo_server.py` and `demo_client.py`) are prepared for users to try out the ASR model with their own voice. After a model and language model is prepared, we can first start the demo server: + +``` +CUDA_VISIBLE_DEVICES=0 python demo_server.py +``` +And then in another console, start the client: + +``` +python demo_client.py +``` +On the client console, press and hold "white-space" key and start talking, then release the "white-space" key when you finish your speech. The decoding results (infered transcription) will be displayed. + +If you would like to start server and client in two machines. Please use `--host_ip` and `--host_port` to indicate the actual IP address and port, for both `demo_server.py` and `demo_client.py`. diff --git a/demo_client.py b/demo_client.py index a789d8164..ddf4dd1bf 100644 --- a/demo_client.py +++ b/demo_client.py @@ -1,10 +1,23 @@ +"""Client-end for the ASR demo.""" from pynput import keyboard import struct import socket import sys +import argparse import pyaudio -HOST, PORT = "10.104.18.14", 8086 +parser = argparse.ArgumentParser(description=__doc__) +parser.add_argument( + "--host_ip", + default="localhost", + type=str, + help="Server IP address. (default: %(default)s)") +parser.add_argument( + "--host_port", + default=8086, + type=int, + help="Server Port. (default: %(default)s)") +args = parser.parse_args() is_recording = False enable_trigger_record = True @@ -42,7 +55,7 @@ def callback(in_data, frame_count, time_info, status): elif len(data_list) > 0: # Connect to server and send data sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - sock.connect((HOST, PORT)) + sock.connect((args.host_ip, args.host_port)) sent = ''.join(data_list) sock.sendall(struct.pack('>i', len(sent)) + sent) print('Speech[length=%d] Sent.' % len(sent)) diff --git a/demo_server.py b/demo_server.py index d6c0de40a..8a55e7265 100644 --- a/demo_server.py +++ b/demo_server.py @@ -1,3 +1,4 @@ +"""Server-end for the ASR demo.""" import os import time import random @@ -17,7 +18,7 @@ from data_utils.utils import read_manifest parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( "--host_ip", - default="10.104.18.14", + default="localhost", type=str, help="Server IP address. (default: %(default)s)") parser.add_argument( From 37b236869fbeb9340609e6917d69d916dc22d36e Mon Sep 17 00:00:00 2001 From: Yibing Liu <352748861@qq.com> Date: Thu, 3 Aug 2017 14:46:18 +0800 Subject: [PATCH 109/335] Delete swig_decoder.py --- deploy/swig_decoder.py | 22 ---------------------- 1 file changed, 22 deletions(-) delete mode 100644 deploy/swig_decoder.py diff --git a/deploy/swig_decoder.py b/deploy/swig_decoder.py deleted file mode 100644 index fed23c9ef..000000000 --- a/deploy/swig_decoder.py +++ /dev/null @@ -1,22 +0,0 @@ -"""Contains various CTC decoders in SWIG.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from swig_ctc_beam_search_decoder import ctc_beam_search_decoder as beam_search_decoder -from swig_ctc_beam_search_decoder import ctc_best_path_decoder as best_path__decoder - - -def ctc_best_path_decoder(probs_seq, vocabulary): - best_path__decoder(probs_seq.to_list(), vocabulary) - - -def ctc_beam_search_decoder( - probs_seq, - beam_size, - vocabulary, - blank_id, - cutoff_prob=1.0, - ext_scoring_func=None, ): - beam_search_decoder(probs_seq.to_list(), beam_size, vocabulary, blank_id, - cutoff_prob, ext_scoring_func) From c0b3281e58a8b0f8bc1ab5772dd1483ff5caf391 Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Thu, 3 Aug 2017 19:00:17 +0800 Subject: [PATCH 110/335] Remove pynput and pyaudio packages from requriements.txt and add installation tips to README.md. --- README.md | 21 +++++++++++++++++++-- demo_server.py | 1 - requirements.txt | 2 -- 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 27dc64887..39dba0cd1 100755 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ - **Python = 2.7** only supported; - **cuDNN >= 6.0** is required to utilize NVIDIA GPU platform in the installation of PaddlePaddle, and the **CUDA toolkit** with proper version suitable for cuDNN. The cuDNN library below 6.0 is found to yield a fatal error in batch normalization when handling utterances with long duration in inference. -### Setup +### Setup for Training & Evaluation ``` sh setup.sh @@ -16,6 +16,19 @@ export LD_LIBRARY_PATH=$PADDLE_INSTALL_DIR/Paddle/third_party/install/warpctc/li Please replace `$PADDLE_INSTALL_DIR` with your own paddle installation directory. +### Setup for Demo + +Please do the following extra installation before run `demo_client.py` to try the realtime ASR demo. However there is no need to install them for the computer running the demo's server-end (`demo_server.py`). For details of running the ASR demo, please refer to the [section](#playing-with-the-asr-demo). + +For example, on MAC OS X: + +``` +brew install portaudio +pip install pyaudio +pip install pynput +``` + + ## Usage ### Preparing Data @@ -158,4 +171,8 @@ python demo_client.py ``` On the client console, press and hold "white-space" key and start talking, then release the "white-space" key when you finish your speech. The decoding results (infered transcription) will be displayed. -If you would like to start server and client in two machines. Please use `--host_ip` and `--host_port` to indicate the actual IP address and port, for both `demo_server.py` and `demo_client.py`. +If you would like to start the server and the client in two machines. Please use `--host_ip` and `--host_port` to indicate the actual IP address and port, for both `demo_server.py` and `demo_client.py`. + +Notice that `demo_client.py` should be started in your local computer with microphone hardware, while `demo_server.py` can be started in any remote server as well as the same local computer. IP address and port should be properly set for server-client communication. + +For running `demo_client.py`, please first finish the [extra installation steps](#setup-for-demo). diff --git a/demo_server.py b/demo_server.py index 8a55e7265..c7e7e94a4 100644 --- a/demo_server.py +++ b/demo_server.py @@ -8,7 +8,6 @@ from time import gmtime, strftime import SocketServer import struct import wave -import pyaudio import paddle.v2 as paddle from utils import print_arguments from data_utils.data import DataGenerator diff --git a/requirements.txt b/requirements.txt index 9297f659c..131f75ff4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,5 +3,3 @@ resampy==0.1.5 SoundFile==0.9.0.post1 python_speech_features https://github.com/luotao1/kenlm/archive/master.zip -pyaudio -pynput From 94db28e088319bf44bb8f4f11b232e6ace9c0300 Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Thu, 3 Aug 2017 21:06:27 +0800 Subject: [PATCH 111/335] Add function doc for infer_batch_loss() function in model.py for DS2. --- model.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/model.py b/model.py index c8766deb1..c2e440b3a 100644 --- a/model.py +++ b/model.py @@ -120,6 +120,16 @@ class DeepSpeech2Model(object): feeding=feeding_dict) def infer_loss_batch(self, infer_data): + """Model inference. Infer the ctc loss for a batch of speech + utterances. + + :param infer_data: List of utterances to infer, with each utterance a + tuple of audio features and transcription text (empty + string). + :type infer_data: list + :return: List of ctc loss. + :rtype: List of float + """ # define inferer if self._loss_inferer == None: self._loss_inferer = paddle.inference.Inference( From f4375ef125cf496d87bb92e9991da12039488077 Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Mon, 7 Aug 2017 19:41:59 +0800 Subject: [PATCH 112/335] Update README.md with code reviews for DS2. --- README.md | 36 ++++++++++++++---------------------- model.py | 6 +++--- 2 files changed, 17 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index 39dba0cd1..96fbb7d09 100755 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ - **Python = 2.7** only supported; - **cuDNN >= 6.0** is required to utilize NVIDIA GPU platform in the installation of PaddlePaddle, and the **CUDA toolkit** with proper version suitable for cuDNN. The cuDNN library below 6.0 is found to yield a fatal error in batch normalization when handling utterances with long duration in inference. -### Setup for Training & Evaluation +### Setup ``` sh setup.sh @@ -16,19 +16,6 @@ export LD_LIBRARY_PATH=$PADDLE_INSTALL_DIR/Paddle/third_party/install/warpctc/li Please replace `$PADDLE_INSTALL_DIR` with your own paddle installation directory. -### Setup for Demo - -Please do the following extra installation before run `demo_client.py` to try the realtime ASR demo. However there is no need to install them for the computer running the demo's server-end (`demo_server.py`). For details of running the ASR demo, please refer to the [section](#playing-with-the-asr-demo). - -For example, on MAC OS X: - -``` -brew install portaudio -pip install pyaudio -pip install pynput -``` - - ## Usage ### Preparing Data @@ -159,20 +146,25 @@ Then reset parameters with the tuning result before inference or evaluating. ### Playing with the ASR Demo -A real-time ASR demo (`demo_server.py` and `demo_client.py`) are prepared for users to try out the ASR model with their own voice. After a model and language model is prepared, we can first start the demo server: +A real-time ASR demo is built for users to try out the ASR model with their own voice. Please do the following installation on the machine you'd like to run the demo's client (no need for the machine running the demo's server). + +For example, on MAC OS X: + +``` +brew install portaudio +pip install pyaudio +pip install pynput +``` +After a model and language model is prepared, we can first start the demo's server: ``` CUDA_VISIBLE_DEVICES=0 python demo_server.py ``` -And then in another console, start the client: +And then in another console, start the demo's client: ``` python demo_client.py ``` -On the client console, press and hold "white-space" key and start talking, then release the "white-space" key when you finish your speech. The decoding results (infered transcription) will be displayed. - -If you would like to start the server and the client in two machines. Please use `--host_ip` and `--host_port` to indicate the actual IP address and port, for both `demo_server.py` and `demo_client.py`. - -Notice that `demo_client.py` should be started in your local computer with microphone hardware, while `demo_server.py` can be started in any remote server as well as the same local computer. IP address and port should be properly set for server-client communication. +On the client console, press and hold the "white-space" key on the keyboard to start talking, until you finish your speech and then release the "white-space" key. The decoding results (infered transcription) will be displayed. -For running `demo_client.py`, please first finish the [extra installation steps](#setup-for-demo). +It could be possible to start the server and the client in two seperate machines, e.g. `demo_client.py` is usually started in a machine with a microphone hardware, while `demo_server.py` is usually started in a remote server with powerful GPUs. Please first make sure that these two machines have network access to each other, and then use `--host_ip` and `--host_port` to indicate the server machine's actual IP address (instead of the `localhost` as default) and TCP port, in both `demo_server.py` and `demo_client.py`. diff --git a/model.py b/model.py index c2e440b3a..2eb7c3594 100644 --- a/model.py +++ b/model.py @@ -143,9 +143,9 @@ class DeepSpeech2Model(object): """Model inference. Infer the transcription for a batch of speech utterances. - :param infer_data: List of utterances to infer, with each utterance a - tuple of audio features and transcription text (empty - string). + :param infer_data: List of utterances to infer, with each utterance + consisting of a tuple of audio features and + transcription text (empty string). :type infer_data: list :param decode_method: Decoding method name, 'best_path' or 'beam search'. From 6df0f9bc4441c6fa34684923130c1115567b6b7f Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Tue, 8 Aug 2017 12:15:18 +0800 Subject: [PATCH 113/335] Reset default multi-thread/process number to half of cpu count() for speedup. --- data_utils/data.py | 2 +- evaluate.py | 4 ++-- infer.py | 2 +- train.py | 2 +- tune.py | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/data_utils/data.py b/data_utils/data.py index fe064b806..34f32019c 100644 --- a/data_utils/data.py +++ b/data_utils/data.py @@ -65,7 +65,7 @@ class DataGenerator(object): max_freq=None, specgram_type='linear', use_dB_normalization=True, - num_threads=multiprocessing.cpu_count(), + num_threads=multiprocessing.cpu_count() // 2, random_seed=0): self._max_duration = max_duration self._min_duration = min_duration diff --git a/evaluate.py b/evaluate.py index fb7211fc2..592b7b527 100644 --- a/evaluate.py +++ b/evaluate.py @@ -45,12 +45,12 @@ parser.add_argument( help="Use gpu or not. (default: %(default)s)") parser.add_argument( "--num_threads_data", - default=multiprocessing.cpu_count(), + default=multiprocessing.cpu_count() // 2, type=int, help="Number of cpu threads for preprocessing data. (default: %(default)s)") parser.add_argument( "--num_processes_beam_search", - default=multiprocessing.cpu_count(), + default=multiprocessing.cpu_count() // 2, type=int, help="Number of cpu processes for beam search. (default: %(default)s)") parser.add_argument( diff --git a/infer.py b/infer.py index 8fd27dce4..df5953e59 100644 --- a/infer.py +++ b/infer.py @@ -45,7 +45,7 @@ parser.add_argument( help="Number of cpu threads for preprocessing data. (default: %(default)s)") parser.add_argument( "--num_processes_beam_search", - default=multiprocessing.cpu_count(), + default=multiprocessing.cpu_count() // 2, type=int, help="Number of cpu processes for beam search. (default: %(default)s)") parser.add_argument( diff --git a/train.py b/train.py index 080f57d2d..aff619379 100644 --- a/train.py +++ b/train.py @@ -86,7 +86,7 @@ parser.add_argument( help="Trainer number. (default: %(default)s)") parser.add_argument( "--num_threads_data", - default=multiprocessing.cpu_count(), + default=multiprocessing.cpu_count() // 2, type=int, help="Number of cpu threads for preprocessing data. (default: %(default)s)") parser.add_argument( diff --git a/tune.py b/tune.py index a17be30fa..328d67a11 100644 --- a/tune.py +++ b/tune.py @@ -51,7 +51,7 @@ parser.add_argument( help="Number of cpu threads for preprocessing data. (default: %(default)s)") parser.add_argument( "--num_processes_beam_search", - default=multiprocessing.cpu_count(), + default=multiprocessing.cpu_count() // 2, type=int, help="Number of cpu processes for beam search. (default: %(default)s)") parser.add_argument( From 961f6a29630ab64696828a8746e0bdd968ab83e8 Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Tue, 8 Aug 2017 15:46:03 +0800 Subject: [PATCH 114/335] Accelerate mfcc computation for DS2. --- data_utils/featurizer/audio_featurizer.py | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/data_utils/featurizer/audio_featurizer.py b/data_utils/featurizer/audio_featurizer.py index 271e535b6..00f0e8a35 100644 --- a/data_utils/featurizer/audio_featurizer.py +++ b/data_utils/featurizer/audio_featurizer.py @@ -166,21 +166,18 @@ class AudioFeaturizer(object): "window size.") # compute 13 cepstral coefficients, and the first one is replaced # by log(frame energy) - mfcc_feat = mfcc( - signal=samples, - samplerate=sample_rate, - winlen=0.001 * window_ms, - winstep=0.001 * stride_ms, - highfreq=max_freq) + mfcc_feat = np.transpose( + mfcc( + signal=samples, + samplerate=sample_rate, + winlen=0.001 * window_ms, + winstep=0.001 * stride_ms, + highfreq=max_freq)) # Deltas d_mfcc_feat = delta(mfcc_feat, 2) # Deltas-Deltas dd_mfcc_feat = delta(d_mfcc_feat, 2) # concat above three features - concat_mfcc_feat = [ - np.concatenate((mfcc_feat[i], d_mfcc_feat[i], dd_mfcc_feat[i])) - for i in xrange(len(mfcc_feat)) - ] - # transpose to be consistent with the linear specgram situation - concat_mfcc_feat = np.transpose(concat_mfcc_feat) + concat_mfcc_feat = np.concatenate( + (mfcc_feat, d_mfcc_feat, dd_mfcc_feat)) return concat_mfcc_feat From ad82c8771231d5cd78a7b14e8e1f83b034072542 Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Thu, 29 Jun 2017 12:27:54 +0800 Subject: [PATCH 115/335] Add NoisePerturbAugmentor and CHiME3 data preparation. --- data_utils/augmentor/augmentation.py | 3 + data_utils/augmentor/noise_perturb.py | 47 +++++++ .../online_bayesian_normalization.py | 0 data_utils/augmentor/resample.py | 0 datasets/noise/chime3_background.py | 128 ++++++++++++++++++ datasets/run_all.sh | 9 ++ 6 files changed, 187 insertions(+) create mode 100644 data_utils/augmentor/noise_perturb.py mode change 100755 => 100644 data_utils/augmentor/online_bayesian_normalization.py mode change 100755 => 100644 data_utils/augmentor/resample.py create mode 100644 datasets/noise/chime3_background.py diff --git a/data_utils/augmentor/augmentation.py b/data_utils/augmentor/augmentation.py index 9dced4731..8a50e4400 100644 --- a/data_utils/augmentor/augmentation.py +++ b/data_utils/augmentor/augmentation.py @@ -8,6 +8,7 @@ import random from data_utils.augmentor.volume_perturb import VolumePerturbAugmentor from data_utils.augmentor.shift_perturb import ShiftPerturbAugmentor from data_utils.augmentor.speed_perturb import SpeedPerturbAugmentor +from data_utils.augmentor.noise_perturb import NoisePerturbAugmentor from data_utils.augmentor.resample import ResampleAugmentor from data_utils.augmentor.online_bayesian_normalization import \ OnlineBayesianNormalizationAugmentor @@ -89,5 +90,7 @@ class AugmentationPipeline(object): return ResampleAugmentor(self._rng, **params) elif augmentor_type == "bayesian_normal": return OnlineBayesianNormalizationAugmentor(self._rng, **params) + elif augmentor_type == "noise": + return NoisePerturbAugmentor(self._rng, **params) else: raise ValueError("Unknown augmentor type [%s]." % augmentor_type) diff --git a/data_utils/augmentor/noise_perturb.py b/data_utils/augmentor/noise_perturb.py new file mode 100644 index 000000000..c97ab8432 --- /dev/null +++ b/data_utils/augmentor/noise_perturb.py @@ -0,0 +1,47 @@ +"""Contains the noise perturb augmentation model.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from data_utils.augmentor.base import AugmentorBase +from data_utils import utils +from data_utils.speech import SpeechSegment + + +class NoisePerturbAugmentor(AugmentorBase): + """Augmentation model for adding background noise. + + :param rng: Random generator object. + :type rng: random.Random + :param min_snr_dB: Minimal signal noise ratio, in decibels. + :type min_snr_dB: float + :param max_snr_dB: Maximal signal noise ratio, in decibels. + :type max_snr_dB: float + """ + + def __init__(self, rng, min_snr_dB, max_snr_dB, noise_manifest): + self._min_snr_dB = min_snr_dB + self._max_snr_dB = max_snr_dB + self._rng = rng + self._manifest = utils.read_manifest(manifest_path=noise_manifest) + + def transform_audio(self, audio_segment): + """Add background noise audio. + + Note that this is an in-place transformation. + + :param audio_segment: Audio segment to add effects to. + :type audio_segment: AudioSegmenet|SpeechSegment + """ + noise_json = self._rng.sample(self._manifest, 1)[0] + if noise_json['duration'] < audio_segment.duration: + raise RuntimeError("The duration of sampled noise audio is smaller " + "than the audio segment to add effects to.") + diff_duration = noise_json['duration'] - audio_segment.duration + start = self._rng.uniform(0, diff_duration) + end = start + audio_segment.duration + noise_segment = SpeechSegment.slice_from_file( + noise_json['audio_filepath'], transcript="", start=start, end=end) + snr_dB = self._rng.uniform(self._min_snr_dB, self._max_snr_dB) + audio_segment.add_noise( + noise_segment, snr_dB, allow_downsampling=True, rng=self._rng) diff --git a/data_utils/augmentor/online_bayesian_normalization.py b/data_utils/augmentor/online_bayesian_normalization.py old mode 100755 new mode 100644 diff --git a/data_utils/augmentor/resample.py b/data_utils/augmentor/resample.py old mode 100755 new mode 100644 diff --git a/datasets/noise/chime3_background.py b/datasets/noise/chime3_background.py new file mode 100644 index 000000000..f79ca7335 --- /dev/null +++ b/datasets/noise/chime3_background.py @@ -0,0 +1,128 @@ +"""Prepare CHiME3 background data. + +Download, unpack and create manifest files. +Manifest file is a json-format file with each line containing the +meta data (i.e. audio filepath, transcript and audio duration) +of each audio file in the data set. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import distutils.util +import os +import wget +import zipfile +import argparse +import soundfile +import json +from paddle.v2.dataset.common import md5file + +DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech') + +URL = "https://d4s.myairbridge.com/packagev2/AG0Y3DNBE5IWRRTV/?dlid=W19XG7T0NNHB027139H0EQ" +MD5 = "c3ff512618d7a67d4f85566ea1bc39ec" + +parser = argparse.ArgumentParser(description=__doc__) +parser.add_argument( + "--target_dir", + default=DATA_HOME + "/chime3_background", + type=str, + help="Directory to save the dataset. (default: %(default)s)") +parser.add_argument( + "--manifest_filepath", + default="manifest.chime3.background", + type=str, + help="Filepath for output manifests. (default: %(default)s)") +args = parser.parse_args() + + +def download(url, md5sum, target_dir, filename=None): + """Download file from url to target_dir, and check md5sum.""" + if filename == None: + filename = url.split("/")[-1] + if not os.path.exists(target_dir): os.makedirs(target_dir) + filepath = os.path.join(target_dir, filename) + if not (os.path.exists(filepath) and md5file(filepath) == md5sum): + print("Downloading %s ..." % url) + wget.download(url, target_dir) + print("\nMD5 Chesksum %s ..." % filepath) + if not md5file(filepath) == md5sum: + raise RuntimeError("MD5 checksum failed.") + else: + print("File exists, skip downloading. (%s)" % filepath) + return filepath + + +def unpack(filepath, target_dir): + """Unpack the file to the target_dir.""" + print("Unpacking %s ..." % filepath) + if filepath.endswith('.zip'): + zip = zipfile.ZipFile(filepath, 'r') + zip.extractall(target_dir) + zip.close() + elif filepath.endswith('.tar') or filepath.endswith('.tar.gz'): + tar = zipfile.open(filepath) + tar.extractall(target_dir) + tar.close() + else: + raise ValueError("File format is not supported for unpacking.") + + +def create_manifest(data_dir, manifest_path): + """Create a manifest json file summarizing the data set, with each line + containing the meta data (i.e. audio filepath, transcription text, audio + duration) of each audio file within the data set. + """ + print("Creating manifest %s ..." % manifest_path) + json_lines = [] + for subfolder, _, filelist in sorted(os.walk(data_dir)): + for filename in filelist: + if filename.endswith('.wav'): + filepath = os.path.join(data_dir, subfolder, filename) + audio_data, samplerate = soundfile.read(filepath) + duration = float(len(audio_data)) / samplerate + json_lines.append( + json.dumps({ + 'audio_filepath': filepath, + 'duration': duration, + 'text': '' + })) + with open(manifest_path, 'w') as out_file: + for line in json_lines: + out_file.write(line + '\n') + + +def prepare_chime3(url, md5sum, target_dir, manifest_path): + """Download, unpack and create summmary manifest file.""" + if not os.path.exists(os.path.join(target_dir, "CHiME3")): + # download + filepath = download(url, md5sum, target_dir, + "myairbridge-AG0Y3DNBE5IWRRTV.zip") + # unpack + unpack(filepath, target_dir) + unpack( + os.path.join(target_dir, 'CHiME3_background_bus.zip'), target_dir) + unpack( + os.path.join(target_dir, 'CHiME3_background_caf.zip'), target_dir) + unpack( + os.path.join(target_dir, 'CHiME3_background_ped.zip'), target_dir) + unpack( + os.path.join(target_dir, 'CHiME3_background_str.zip'), target_dir) + else: + print("Skip downloading and unpacking. Data already exists in %s." % + target_dir) + # create manifest json file + create_manifest(target_dir, manifest_path) + + +def main(): + prepare_chime3( + url=URL, + md5sum=MD5, + target_dir=args.target_dir, + manifest_path=args.manifest_filepath) + + +if __name__ == '__main__': + main() diff --git a/datasets/run_all.sh b/datasets/run_all.sh index ef2b721fb..61747a50b 100644 --- a/datasets/run_all.sh +++ b/datasets/run_all.sh @@ -6,8 +6,17 @@ if [ $? -ne 0 ]; then fi cd - +cd noise +python chime3_background.py +if [ $? -ne 0 ]; then + echo "Prepare CHiME3 background noise failed. Terminated." + exit 1 +fi +cd - + cat librispeech/manifest.train* | shuf > manifest.train cat librispeech/manifest.dev-clean > manifest.dev cat librispeech/manifest.test-clean > manifest.test +cat noise/manifest.* > manifest.noise echo "All done." From 99e819e8eae355889c5e983abfbe50bb74e0748a Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Tue, 4 Jul 2017 18:51:11 +0800 Subject: [PATCH 116/335] Add ImpulseResponseAugmentor and augmentation.config file. --- augmentation.config | 34 ++++++++++++++++ data_utils/audio.py | 8 ++-- data_utils/augmentor/augmentation.py | 52 ++++++++++++++++++------ data_utils/augmentor/impulse_response.py | 34 ++++++++++++++++ data_utils/augmentor/noise_perturb.py | 8 ++-- data_utils/data.py | 2 +- data_utils/speech.py | 2 +- train.py | 4 +- 8 files changed, 120 insertions(+), 24 deletions(-) create mode 100644 augmentation.config create mode 100644 data_utils/augmentor/impulse_response.py diff --git a/augmentation.config b/augmentation.config new file mode 100644 index 000000000..9ddedd407 --- /dev/null +++ b/augmentation.config @@ -0,0 +1,34 @@ +[ + { + "type": "noise", + "params": {"min_snr_dB": 50, + "max_snr_dB": 50, + "noise_manifest": "datasets/manifest.noise"}, + "prob": 0.0 + }, + { + "type": "speed", + "params": {"min_speed_rate": 0.9, + "max_speed_rate": 1.1}, + "prob": 0.0 + }, + { + "type": "shift", + "params": {"min_shift_ms": -5, + "max_shift_ms": 5}, + "prob": 1.0 + }, + { + "type": "volume", + "params": {"min_gain_dBFS": -10, + "max_gain_dBFS": 10}, + "prob": 0.0 + }, + { + "type": "bayesian_normal", + "params": {"target_db": -20, + "prior_db": -20, + "prior_samples": 100}, + "prob": 0.0 + } +] diff --git a/data_utils/audio.py b/data_utils/audio.py index 3891f5b92..30e25221c 100644 --- a/data_utils/audio.py +++ b/data_utils/audio.py @@ -204,7 +204,7 @@ class AudioSegment(object): :raise ValueError: If the sample rates of the two segments are not equal, or if the lengths of segments don't match. """ - if type(self) != type(other): + if isinstance(other, type(self)): raise TypeError("Cannot add segments of different types: %s " "and %s." % (type(self), type(other))) if self._sample_rate != other._sample_rate: @@ -231,7 +231,7 @@ class AudioSegment(object): Note that this is an in-place transformation. :param gain: Gain in decibels to apply to samples. - :type gain: float + :type gain: float|1darray """ self._samples *= 10.**(gain / 20.) @@ -457,9 +457,9 @@ class AudioSegment(object): audio segments when resample is not allowed. """ if allow_resample and self.sample_rate != impulse_segment.sample_rate: - impulse_segment = impulse_segment.resample(self.sample_rate) + impulse_segment.resample(self.sample_rate) if self.sample_rate != impulse_segment.sample_rate: - raise ValueError("Impulse segment's sample rate (%d Hz) is not" + raise ValueError("Impulse segment's sample rate (%d Hz) is not " "equal to base signal sample rate (%d Hz)." % (impulse_segment.sample_rate, self.sample_rate)) samples = signal.fftconvolve(self.samples, impulse_segment.samples, diff --git a/data_utils/augmentor/augmentation.py b/data_utils/augmentor/augmentation.py index 8a50e4400..c9e360313 100644 --- a/data_utils/augmentor/augmentation.py +++ b/data_utils/augmentor/augmentation.py @@ -9,6 +9,7 @@ from data_utils.augmentor.volume_perturb import VolumePerturbAugmentor from data_utils.augmentor.shift_perturb import ShiftPerturbAugmentor from data_utils.augmentor.speed_perturb import SpeedPerturbAugmentor from data_utils.augmentor.noise_perturb import NoisePerturbAugmentor +from data_utils.augmentor.impulse_response import ImpulseResponseAugmentor from data_utils.augmentor.resample import ResampleAugmentor from data_utils.augmentor.online_bayesian_normalization import \ OnlineBayesianNormalizationAugmentor @@ -24,21 +25,46 @@ class AugmentationPipeline(object): string, e.g. .. code-block:: - - '[{"type": "volume", - "params": {"min_gain_dBFS": -15, - "max_gain_dBFS": 15}, - "prob": 0.5}, - {"type": "speed", - "params": {"min_speed_rate": 0.8, - "max_speed_rate": 1.2}, - "prob": 0.5} - ]' + [ { + "type": "noise", + "params": {"min_snr_dB": 10, + "max_snr_dB": 20, + "noise_manifest": "datasets/manifest.noise"}, + "prob": 0.0 + }, + { + "type": "speed", + "params": {"min_speed_rate": 0.9, + "max_speed_rate": 1.1}, + "prob": 1.0 + }, + { + "type": "shift", + "params": {"min_shift_ms": -5, + "max_shift_ms": 5}, + "prob": 1.0 + }, + { + "type": "volume", + "params": {"min_gain_dBFS": -10, + "max_gain_dBFS": 10}, + "prob": 0.0 + }, + { + "type": "bayesian_normal", + "params": {"target_db": -20, + "prior_db": -20, + "prior_samples": 100}, + "prob": 0.0 + } + ] + This augmentation configuration inserts two augmentation models into the pipeline, with one is VolumePerturbAugmentor and the other SpeedPerturbAugmentor. "prob" indicates the probability of the current - augmentor to take effect. + augmentor to take effect. If "prob" is zero, the augmentor does not take + effect. :param augmentation_config: Augmentation configuration in json string. :type augmentation_config: str @@ -61,7 +87,7 @@ class AugmentationPipeline(object): :type audio_segment: AudioSegmenet|SpeechSegment """ for augmentor, rate in zip(self._augmentors, self._rates): - if self._rng.uniform(0., 1.) <= rate: + if self._rng.uniform(0., 1.) < rate: augmentor.transform_audio(audio_segment) def _parse_pipeline_from(self, config_json): @@ -92,5 +118,7 @@ class AugmentationPipeline(object): return OnlineBayesianNormalizationAugmentor(self._rng, **params) elif augmentor_type == "noise": return NoisePerturbAugmentor(self._rng, **params) + elif augmentor_type == "impulse": + return ImpulseResponseAugmentor(self._rng, **params) else: raise ValueError("Unknown augmentor type [%s]." % augmentor_type) diff --git a/data_utils/augmentor/impulse_response.py b/data_utils/augmentor/impulse_response.py new file mode 100644 index 000000000..d868c3a1c --- /dev/null +++ b/data_utils/augmentor/impulse_response.py @@ -0,0 +1,34 @@ +"""Contains the impulse response augmentation model.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from data_utils.augmentor.base import AugmentorBase +from data_utils import utils +from data_utils.audio import AudioSegment + + +class ImpulseResponseAugmentor(AugmentorBase): + """Augmentation model for adding impulse response effect. + + :param rng: Random generator object. + :type rng: random.Random + :param impulse_manifest: Manifest path for impulse audio data. + :type impulse_manifest: basestring + """ + + def __init__(self, rng, impulse_manifest): + self._rng = rng + self._manifest = utils.read_manifest(manifest_path=impulse_manifest) + + def transform_audio(self, audio_segment): + """Add impulse response effect. + + Note that this is an in-place transformation. + + :param audio_segment: Audio segment to add effects to. + :type audio_segment: AudioSegmenet|SpeechSegment + """ + noise_json = self._rng.sample(self._manifest, 1)[0] + noise_segment = AudioSegment.from_file(noise_json['audio_filepath']) + audio_segment.convolve(noise_segment, allow_resample=True) diff --git a/data_utils/augmentor/noise_perturb.py b/data_utils/augmentor/noise_perturb.py index c97ab8432..b4fa18e18 100644 --- a/data_utils/augmentor/noise_perturb.py +++ b/data_utils/augmentor/noise_perturb.py @@ -5,7 +5,7 @@ from __future__ import print_function from data_utils.augmentor.base import AugmentorBase from data_utils import utils -from data_utils.speech import SpeechSegment +from data_utils.audio import AudioSegment class NoisePerturbAugmentor(AugmentorBase): @@ -17,6 +17,8 @@ class NoisePerturbAugmentor(AugmentorBase): :type min_snr_dB: float :param max_snr_dB: Maximal signal noise ratio, in decibels. :type max_snr_dB: float + :param noise_manifest: Manifest path for noise audio data. + :type noise_manifest: basestring """ def __init__(self, rng, min_snr_dB, max_snr_dB, noise_manifest): @@ -40,8 +42,8 @@ class NoisePerturbAugmentor(AugmentorBase): diff_duration = noise_json['duration'] - audio_segment.duration start = self._rng.uniform(0, diff_duration) end = start + audio_segment.duration - noise_segment = SpeechSegment.slice_from_file( - noise_json['audio_filepath'], transcript="", start=start, end=end) + noise_segment = AudioSegment.slice_from_file( + noise_json['audio_filepath'], start=start, end=end) snr_dB = self._rng.uniform(self._min_snr_dB, self._max_snr_dB) audio_segment.add_noise( noise_segment, snr_dB, allow_downsampling=True, rng=self._rng) diff --git a/data_utils/data.py b/data_utils/data.py index 34f32019c..159bf69d5 100644 --- a/data_utils/data.py +++ b/data_utils/data.py @@ -169,7 +169,7 @@ class DataGenerator(object): manifest, batch_size, clipped=True) elif shuffle_method == "instance_shuffle": self._rng.shuffle(manifest) - elif not shuffle_method: + elif shuffle_method == None: pass else: raise ValueError("Unknown shuffle method %s." % diff --git a/data_utils/speech.py b/data_utils/speech.py index 568e4443b..17d68f315 100644 --- a/data_utils/speech.py +++ b/data_utils/speech.py @@ -115,7 +115,7 @@ class SpeechSegment(AudioSegment): speech file. :rtype: SpeechSegment """ - audio = Audiosegment.slice_from_file(filepath, start, end) + audio = AudioSegment.slice_from_file(filepath, start, end) return cls(audio.samples, audio.sample_rate, transcript) @classmethod diff --git a/train.py b/train.py index aff619379..34c406015 100644 --- a/train.py +++ b/train.py @@ -123,9 +123,7 @@ parser.add_argument( help="Directory for saving models. (default: %(default)s)") parser.add_argument( "--augmentation_config", - default='[{"type": "shift", ' - '"params": {"min_shift_ms": -5, "max_shift_ms": 5},' - '"prob": 1.0}]', + default=open('augmentation.config', 'r').read(), type=str, help="Augmentation configuration in json-format. " "(default: %(default)s)") From 012261912d112d07da58dee40655975cf5cd3dd1 Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Wed, 5 Jul 2017 14:18:12 +0800 Subject: [PATCH 117/335] Update default config in augmentation.config for DS2. --- augmentation.config | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/augmentation.config b/augmentation.config index 9ddedd407..263af5864 100644 --- a/augmentation.config +++ b/augmentation.config @@ -1,15 +1,15 @@ [ { "type": "noise", - "params": {"min_snr_dB": 50, + "params": {"min_snr_dB": 40, "max_snr_dB": 50, "noise_manifest": "datasets/manifest.noise"}, "prob": 0.0 }, { "type": "speed", - "params": {"min_speed_rate": 0.9, - "max_speed_rate": 1.1}, + "params": {"min_speed_rate": 0.95, + "max_speed_rate": 1.05}, "prob": 0.0 }, { @@ -24,11 +24,4 @@ "max_gain_dBFS": 10}, "prob": 0.0 }, - { - "type": "bayesian_normal", - "params": {"target_db": -20, - "prior_db": -20, - "prior_samples": 100}, - "prob": 0.0 - } ] From 1b787411d332bc0eec265c354562576b33112e91 Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Tue, 8 Aug 2017 21:53:26 +0800 Subject: [PATCH 118/335] Update noise and impulse augmentor according to code review. --- conf/augmentation.config | 8 ++++++++ .../augmentation.config.example | 18 +++++++++++++++--- data_utils/augmentor/augmentation.py | 2 +- data_utils/augmentor/impulse_response.py | 15 ++++++++------- data_utils/augmentor/noise_perturb.py | 11 ++++++----- datasets/run_all.sh | 9 --------- datasets/run_noise.sh | 10 ++++++++++ train.py | 2 +- 8 files changed, 49 insertions(+), 26 deletions(-) create mode 100644 conf/augmentation.config rename augmentation.config => conf/augmentation.config.example (56%) create mode 100644 datasets/run_noise.sh diff --git a/conf/augmentation.config b/conf/augmentation.config new file mode 100644 index 000000000..6c24da549 --- /dev/null +++ b/conf/augmentation.config @@ -0,0 +1,8 @@ +[ + { + "type": "shift", + "params": {"min_shift_ms": -5, + "max_shift_ms": 5}, + "prob": 1.0 + } +] diff --git a/augmentation.config b/conf/augmentation.config.example similarity index 56% rename from augmentation.config rename to conf/augmentation.config.example index 263af5864..21ed6ee10 100644 --- a/augmentation.config +++ b/conf/augmentation.config.example @@ -3,14 +3,19 @@ "type": "noise", "params": {"min_snr_dB": 40, "max_snr_dB": 50, - "noise_manifest": "datasets/manifest.noise"}, - "prob": 0.0 + "noise_manifest_path": "datasets/manifest.noise"}, + "prob": 0.6 + }, + { + "type": "impulse", + "params": {"impulse_manifest_path": "datasets/manifest.impulse"}, + "prob": 0.5 }, { "type": "speed", "params": {"min_speed_rate": 0.95, "max_speed_rate": 1.05}, - "prob": 0.0 + "prob": 0.5 }, { "type": "shift", @@ -24,4 +29,11 @@ "max_gain_dBFS": 10}, "prob": 0.0 }, + { + "type": "bayesian_normal", + "params": {"target_db": -20, + "prior_db": -20, + "prior_samples": 100}, + "prob": 0.0 + } ] diff --git a/data_utils/augmentor/augmentation.py b/data_utils/augmentor/augmentation.py index c9e360313..5c30b627e 100644 --- a/data_utils/augmentor/augmentation.py +++ b/data_utils/augmentor/augmentation.py @@ -30,7 +30,7 @@ class AugmentationPipeline(object): "type": "noise", "params": {"min_snr_dB": 10, "max_snr_dB": 20, - "noise_manifest": "datasets/manifest.noise"}, + "noise_manifest_path": "datasets/manifest.noise"}, "prob": 0.0 }, { diff --git a/data_utils/augmentor/impulse_response.py b/data_utils/augmentor/impulse_response.py index d868c3a1c..c3de0fdbb 100644 --- a/data_utils/augmentor/impulse_response.py +++ b/data_utils/augmentor/impulse_response.py @@ -13,13 +13,14 @@ class ImpulseResponseAugmentor(AugmentorBase): :param rng: Random generator object. :type rng: random.Random - :param impulse_manifest: Manifest path for impulse audio data. - :type impulse_manifest: basestring + :param impulse_manifest_path: Manifest path for impulse audio data. + :type impulse_manifest_path: basestring """ - def __init__(self, rng, impulse_manifest): + def __init__(self, rng, impulse_manifest_path): self._rng = rng - self._manifest = utils.read_manifest(manifest_path=impulse_manifest) + self._impulse_manifest = utils.read_manifest( + manifest_path=impulse_manifest_path) def transform_audio(self, audio_segment): """Add impulse response effect. @@ -29,6 +30,6 @@ class ImpulseResponseAugmentor(AugmentorBase): :param audio_segment: Audio segment to add effects to. :type audio_segment: AudioSegmenet|SpeechSegment """ - noise_json = self._rng.sample(self._manifest, 1)[0] - noise_segment = AudioSegment.from_file(noise_json['audio_filepath']) - audio_segment.convolve(noise_segment, allow_resample=True) + impulse_json = self._rng.sample(self._impulse_manifest, 1)[0] + impulse_segment = AudioSegment.from_file(impulse_json['audio_filepath']) + audio_segment.convolve(impulse_segment, allow_resample=True) diff --git a/data_utils/augmentor/noise_perturb.py b/data_utils/augmentor/noise_perturb.py index b4fa18e18..281174af4 100644 --- a/data_utils/augmentor/noise_perturb.py +++ b/data_utils/augmentor/noise_perturb.py @@ -17,15 +17,16 @@ class NoisePerturbAugmentor(AugmentorBase): :type min_snr_dB: float :param max_snr_dB: Maximal signal noise ratio, in decibels. :type max_snr_dB: float - :param noise_manifest: Manifest path for noise audio data. - :type noise_manifest: basestring + :param noise_manifest_path: Manifest path for noise audio data. + :type noise_manifest_path: basestring """ - def __init__(self, rng, min_snr_dB, max_snr_dB, noise_manifest): + def __init__(self, rng, min_snr_dB, max_snr_dB, noise_manifest_path): self._min_snr_dB = min_snr_dB self._max_snr_dB = max_snr_dB self._rng = rng - self._manifest = utils.read_manifest(manifest_path=noise_manifest) + self._noise_manifest = utils.read_manifest( + manifest_path=noise_manifest_path) def transform_audio(self, audio_segment): """Add background noise audio. @@ -35,7 +36,7 @@ class NoisePerturbAugmentor(AugmentorBase): :param audio_segment: Audio segment to add effects to. :type audio_segment: AudioSegmenet|SpeechSegment """ - noise_json = self._rng.sample(self._manifest, 1)[0] + noise_json = self._rng.sample(self._noise_manifest, 1)[0] if noise_json['duration'] < audio_segment.duration: raise RuntimeError("The duration of sampled noise audio is smaller " "than the audio segment to add effects to.") diff --git a/datasets/run_all.sh b/datasets/run_all.sh index 61747a50b..ef2b721fb 100644 --- a/datasets/run_all.sh +++ b/datasets/run_all.sh @@ -6,17 +6,8 @@ if [ $? -ne 0 ]; then fi cd - -cd noise -python chime3_background.py -if [ $? -ne 0 ]; then - echo "Prepare CHiME3 background noise failed. Terminated." - exit 1 -fi -cd - - cat librispeech/manifest.train* | shuf > manifest.train cat librispeech/manifest.dev-clean > manifest.dev cat librispeech/manifest.test-clean > manifest.test -cat noise/manifest.* > manifest.noise echo "All done." diff --git a/datasets/run_noise.sh b/datasets/run_noise.sh new file mode 100644 index 000000000..7b27abde4 --- /dev/null +++ b/datasets/run_noise.sh @@ -0,0 +1,10 @@ +cd noise +python chime3_background.py +if [ $? -ne 0 ]; then + echo "Prepare CHiME3 background noise failed. Terminated." + exit 1 +fi +cd - + +cat noise/manifest.* > manifest.noise +echo "All done." diff --git a/train.py b/train.py index 34c406015..0d4e2508d 100644 --- a/train.py +++ b/train.py @@ -123,7 +123,7 @@ parser.add_argument( help="Directory for saving models. (default: %(default)s)") parser.add_argument( "--augmentation_config", - default=open('augmentation.config', 'r').read(), + default=open('conf/augmentation.config', 'r').read(), type=str, help="Augmentation configuration in json-format. " "(default: %(default)s)") From 7e39debcb03c1f5d9faa3548ec53647c379207af Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Tue, 8 Aug 2017 22:17:20 +0800 Subject: [PATCH 119/335] Convert README.md's file mode to 644. --- README.md | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 README.md diff --git a/README.md b/README.md old mode 100755 new mode 100644 From 14d2fb795c4b8cd145d9820016ea03d3293a58ea Mon Sep 17 00:00:00 2001 From: yangyaming Date: Wed, 9 Aug 2017 11:09:26 +0800 Subject: [PATCH 120/335] Unify encoding to 'utf-8' and optimize error rate calculation. --- data_utils/data.py | 8 +-- data_utils/featurizer/text_featurizer.py | 3 +- data_utils/utils.py | 7 ++- datasets/librispeech/librispeech.py | 3 +- error_rate.py | 77 ++++++++++++++---------- tests/test_error_rate.py | 18 +++++- 6 files changed, 71 insertions(+), 45 deletions(-) diff --git a/data_utils/data.py b/data_utils/data.py index 159bf69d5..14b02f993 100644 --- a/data_utils/data.py +++ b/data_utils/data.py @@ -91,7 +91,7 @@ class DataGenerator(object): :param transcript: Transcription text. :type transcript: basestring :return: Tuple of audio feature tensor and list of token ids for - transcription. + transcription. :rtype: tuple of (2darray, list) """ speech_segment = SpeechSegment.from_file(filename, transcript) @@ -111,7 +111,7 @@ class DataGenerator(object): """ Batch data reader creator for audio data. Return a callable generator function to produce batches of data. - + Audio features within one batch will be padded with zeros to have the same shape, or a user-defined shape. @@ -191,9 +191,9 @@ class DataGenerator(object): @property def feeding(self): """Returns data reader's feeding dict. - + :return: Data feeding dict. - :rtype: dict + :rtype: dict """ return {"audio_spectrogram": 0, "transcript_text": 1} diff --git a/data_utils/featurizer/text_featurizer.py b/data_utils/featurizer/text_featurizer.py index 4f9a49b59..89202163c 100644 --- a/data_utils/featurizer/text_featurizer.py +++ b/data_utils/featurizer/text_featurizer.py @@ -4,6 +4,7 @@ from __future__ import division from __future__ import print_function import os +import codecs class TextFeaturizer(object): @@ -59,7 +60,7 @@ class TextFeaturizer(object): def _load_vocabulary_from_file(self, vocab_filepath): """Load vocabulary from file.""" vocab_lines = [] - with open(vocab_filepath, 'r') as file: + with codecs.open(vocab_filepath, 'r', 'utf-8') as file: vocab_lines.extend(file.readlines()) vocab_list = [line[:-1] for line in vocab_lines] vocab_dict = dict( diff --git a/data_utils/utils.py b/data_utils/utils.py index 3f1165718..f970ff55a 100644 --- a/data_utils/utils.py +++ b/data_utils/utils.py @@ -4,15 +4,16 @@ from __future__ import division from __future__ import print_function import json +import codecs def read_manifest(manifest_path, max_duration=float('inf'), min_duration=0.0): """Load and parse manifest file. - + Instances with durations outside [min_duration, max_duration] will be filtered out. - :param manifest_path: Manifest file to load and parse. + :param manifest_path: Manifest file to load and parse. :type manifest_path: basestring :param max_duration: Maximal duration in seconds for instance filter. :type max_duration: float @@ -23,7 +24,7 @@ def read_manifest(manifest_path, max_duration=float('inf'), min_duration=0.0): :raises IOError: If failed to parse the manifest. """ manifest = [] - for json_line in open(manifest_path): + for json_line in codecs.open(manifest_path, 'r', 'utf-8'): try: json_data = json.loads(json_line) except Exception as e: diff --git a/datasets/librispeech/librispeech.py b/datasets/librispeech/librispeech.py index 7e941f0ea..422b1ed82 100644 --- a/datasets/librispeech/librispeech.py +++ b/datasets/librispeech/librispeech.py @@ -17,6 +17,7 @@ import argparse import soundfile import json from paddle.v2.dataset.common import md5file +import codecs DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech') @@ -112,7 +113,7 @@ def create_manifest(data_dir, manifest_path): 'duration': duration, 'text': text })) - with open(manifest_path, 'w') as out_file: + with codecs.open(manifest_path, 'w', 'utf-8') as out_file: for line in json_lines: out_file.write(line + '\n') diff --git a/error_rate.py b/error_rate.py index 0cf17921c..22e5c19b7 100644 --- a/error_rate.py +++ b/error_rate.py @@ -10,47 +10,52 @@ import numpy as np def _levenshtein_distance(ref, hyp): - """Levenshtein distance is a string metric for measuring the difference between - two sequences. Informally, the levenshtein disctance is defined as the minimum - number of single-character edits (substitutions, insertions or deletions) - required to change one word into the other. We can naturally extend the edits to - word level when calculate levenshtein disctance for two sentences. + """Levenshtein distance is a string metric for measuring the difference + between two sequences. Informally, the levenshtein disctance is defined as + the minimum number of single-character edits (substitutions, insertions or + deletions) required to change one word into the other. We can naturally + extend the edits to word level when calculate levenshtein disctance for + two sentences. """ - ref_len = len(ref) - hyp_len = len(hyp) + m = len(ref) + n = len(hyp) # special case if ref == hyp: return 0 - if ref_len == 0: - return hyp_len - if hyp_len == 0: - return ref_len + if m == 0: + return n + if n == 0: + return m - distance = np.zeros((ref_len + 1, hyp_len + 1), dtype=np.int32) + if m < n: + ref, hyp = hyp, ref + m, n = n, m + + # use O(min(m, n)) space + distance = np.zeros((2, n + 1), dtype=np.int32) # initialize distance matrix - for j in xrange(hyp_len + 1): + for j in xrange(n + 1): distance[0][j] = j - for i in xrange(ref_len + 1): - distance[i][0] = i # calculate levenshtein distance - for i in xrange(1, ref_len + 1): - for j in xrange(1, hyp_len + 1): + for i in xrange(1, m + 1): + distance[i % 2][0] = i + for j in xrange(1, n + 1): if ref[i - 1] == hyp[j - 1]: - distance[i][j] = distance[i - 1][j - 1] + distance[i % 2][j] = distance[(i - 1) % 2][j - 1] else: - s_num = distance[i - 1][j - 1] + 1 - i_num = distance[i][j - 1] + 1 - d_num = distance[i - 1][j] + 1 - distance[i][j] = min(s_num, i_num, d_num) + s_num = distance[(i - 1) % 2][j - 1] + 1 + i_num = distance[i % 2][j - 1] + 1 + d_num = distance[(i - 1) % 2][j] + 1 + distance[i % 2][j] = min(s_num, i_num, d_num) - return distance[ref_len][hyp_len] + return distance[m % 2][n] def wer(reference, hypothesis, ignore_case=False, delimiter=' '): - """Calculate word error rate (WER). WER compares reference text and + """Calculate word error rate (WER). WER compares reference text and hypothesis text in word-level. WER is defined as: .. math:: @@ -65,8 +70,8 @@ def wer(reference, hypothesis, ignore_case=False, delimiter=' '): Iw is the number of words inserted, Nw is the number of words in the reference - We can use levenshtein distance to calculate WER. Please draw an attention that - empty items will be removed when splitting sentences by delimiter. + We can use levenshtein distance to calculate WER. Please draw an attention + that empty items will be removed when splitting sentences by delimiter. :param reference: The reference sentence. :type reference: basestring @@ -95,7 +100,7 @@ def wer(reference, hypothesis, ignore_case=False, delimiter=' '): return wer -def cer(reference, hypothesis, ignore_case=False): +def cer(reference, hypothesis, ignore_case=False, remove_space=False): """Calculate charactor error rate (CER). CER compares reference text and hypothesis text in char-level. CER is defined as: @@ -111,10 +116,10 @@ def cer(reference, hypothesis, ignore_case=False): Ic is the number of characters inserted Nc is the number of characters in the reference - We can use levenshtein distance to calculate CER. Chinese input should be - encoded to unicode. Please draw an attention that the leading and tailing - white space characters will be truncated and multiple consecutive white - space characters in a sentence will be replaced by one white space character. + We can use levenshtein distance to calculate CER. Chinese input should be + encoded to unicode. Please draw an attention that the leading and tailing + space characters will be truncated and multiple consecutive space + characters in a sentence will be replaced by one space character. :param reference: The reference sentence. :type reference: basestring @@ -122,6 +127,8 @@ def cer(reference, hypothesis, ignore_case=False): :type hypothesis: basestring :param ignore_case: Whether case-sensitive or not. :type ignore_case: bool + :param remove_space: Whether remove internal space characters + :type remove_space: bool :return: Character error rate. :rtype: float :raises ValueError: If the reference length is zero. @@ -130,8 +137,12 @@ def cer(reference, hypothesis, ignore_case=False): reference = reference.lower() hypothesis = hypothesis.lower() - reference = ' '.join(filter(None, reference.split(' '))) - hypothesis = ' '.join(filter(None, hypothesis.split(' '))) + join_char = ' ' + if remove_space == True: + join_char = '' + + reference = join_char.join(filter(None, reference.split(' '))) + hypothesis = join_char.join(filter(None, hypothesis.split(' '))) if len(reference) == 0: raise ValueError("Length of reference should be greater than 0.") diff --git a/tests/test_error_rate.py b/tests/test_error_rate.py index be7313f35..370dd0da2 100644 --- a/tests/test_error_rate.py +++ b/tests/test_error_rate.py @@ -33,22 +33,34 @@ class TestParse(unittest.TestCase): self.assertTrue(abs(char_error_rate - 0.25) < 1e-6) def test_cer_2(self): + ref = 'werewolf' + hyp = 'weae wolf' + char_error_rate = error_rate.cer(ref, hyp, remove_space=True) + self.assertTrue(abs(char_error_rate - 0.125) < 1e-6) + + def test_cer_3(self): ref = 'werewolf' char_error_rate = error_rate.cer(ref, ref) self.assertEqual(char_error_rate, 0.0) - def test_cer_3(self): + def test_cer_4(self): ref = u'我是中国人' hyp = u'我是 美洲人' char_error_rate = error_rate.cer(ref, hyp) self.assertTrue(abs(char_error_rate - 0.6) < 1e-6) - def test_cer_4(self): + def test_cer_5(self): + ref = u'我 是 中 国 人' + hyp = u'我 是 美 洲 人' + char_error_rate = error_rate.cer(ref, hyp, remove_space=True) + self.assertTrue(abs(char_error_rate - 0.4) < 1e-6) + + def test_cer_6(self): ref = u'我是中国人' char_error_rate = error_rate.cer(ref, ref) self.assertFalse(char_error_rate, 0.0) - def test_cer_5(self): + def test_cer_7(self): ref = '' hyp = 'Hypothesis' with self.assertRaises(ValueError): From 04970705d6cef9538cba93c77d558790ede3c765 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Wed, 9 Aug 2017 14:14:04 +0800 Subject: [PATCH 121/335] Add more test cases and make DP more clear. --- datasets/librispeech/librispeech.py | 2 +- error_rate.py | 14 ++++---- tests/test_error_rate.py | 56 +++++++++++++++++++++++++---- 3 files changed, 59 insertions(+), 13 deletions(-) diff --git a/datasets/librispeech/librispeech.py b/datasets/librispeech/librispeech.py index 422b1ed82..d963a7d53 100644 --- a/datasets/librispeech/librispeech.py +++ b/datasets/librispeech/librispeech.py @@ -16,8 +16,8 @@ import tarfile import argparse import soundfile import json -from paddle.v2.dataset.common import md5file import codecs +from paddle.v2.dataset.common import md5file DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech') diff --git a/error_rate.py b/error_rate.py index 22e5c19b7..ea829f470 100644 --- a/error_rate.py +++ b/error_rate.py @@ -41,15 +41,17 @@ def _levenshtein_distance(ref, hyp): # calculate levenshtein distance for i in xrange(1, m + 1): - distance[i % 2][0] = i + prev_row_idx = (i - 1) % 2 + cur_row_idx = i % 2 + distance[cur_row_idx][0] = i for j in xrange(1, n + 1): if ref[i - 1] == hyp[j - 1]: - distance[i % 2][j] = distance[(i - 1) % 2][j - 1] + distance[cur_row_idx][j] = distance[prev_row_idx][j - 1] else: - s_num = distance[(i - 1) % 2][j - 1] + 1 - i_num = distance[i % 2][j - 1] + 1 - d_num = distance[(i - 1) % 2][j] + 1 - distance[i % 2][j] = min(s_num, i_num, d_num) + s_num = distance[prev_row_idx][j - 1] + 1 + i_num = distance[cur_row_idx][j - 1] + 1 + d_num = distance[prev_row_idx][j] + 1 + distance[cur_row_idx][j] = min(s_num, i_num, d_num) return distance[m % 2][n] diff --git a/tests/test_error_rate.py b/tests/test_error_rate.py index 370dd0da2..99e137a9a 100644 --- a/tests/test_error_rate.py +++ b/tests/test_error_rate.py @@ -11,16 +11,54 @@ import error_rate class TestParse(unittest.TestCase): def test_wer_1(self): ref = 'i UM the PHONE IS i LEFT THE portable PHONE UPSTAIRS last night' - hyp = 'i GOT IT TO the FULLEST i LOVE TO portable FROM OF STORES last night' + hyp = 'i GOT IT TO the FULLEST i LOVE TO portable FROM OF STORES last '\ + 'night' word_error_rate = error_rate.wer(ref, hyp) self.assertTrue(abs(word_error_rate - 0.769230769231) < 1e-6) def test_wer_2(self): + ref = 'as any in england i would say said gamewell proudly that is '\ + 'in his day' + hyp = 'as any in england i would say said came well proudly that is '\ + 'in his day' + word_error_rate = error_rate.wer(ref, hyp) + self.assertTrue(abs(word_error_rate - 0.1333333) < 1e-6) + + def test_wer_3(self): + ref = 'the lieutenant governor lilburn w boggs afterward governor '\ + 'was a pronounced mormon hater and throughout the period of '\ + 'the troubles he manifested sympathy with the persecutors' + hyp = 'the lieutenant governor little bit how bags afterward '\ + 'governor was a pronounced warman hater and throughout the '\ + 'period of th troubles he manifests sympathy with the '\ + 'persecutors' + word_error_rate = error_rate.wer(ref, hyp) + self.assertTrue(abs(word_error_rate - 0.2692307692) < 1e-6) + + def test_wer_4(self): + ref = 'the wood flamed up splendidly under the large brewing copper '\ + 'and it sighed so deeply' + hyp = 'the wood flame do splendidly under the large brewing copper '\ + 'and its side so deeply' + word_error_rate = error_rate.wer(ref, hyp) + self.assertTrue(abs(word_error_rate - 0.2666666667) < 1e-6) + + def test_wer_5(self): + ref = 'all the morning they trudged up the mountain path and at noon '\ + 'unc and ojo sat on a fallen tree trunk and ate the last of '\ + 'the bread which the old munchkin had placed in his pocket' + hyp = 'all the morning they trudged up the mountain path and at noon '\ + 'unc in ojo sat on a fallen tree trunk and ate the last of '\ + 'the bread which the old munchkin had placed in his pocket' + word_error_rate = error_rate.wer(ref, hyp) + self.assertTrue(abs(word_error_rate - 0.027027027) < 1e-6) + + def test_wer_6(self): ref = 'i UM the PHONE IS i LEFT THE portable PHONE UPSTAIRS last night' word_error_rate = error_rate.wer(ref, ref) self.assertEqual(word_error_rate, 0.0) - def test_wer_3(self): + def test_wer_7(self): ref = ' ' hyp = 'Hypothesis sentence' with self.assertRaises(ValueError): @@ -39,28 +77,34 @@ class TestParse(unittest.TestCase): self.assertTrue(abs(char_error_rate - 0.125) < 1e-6) def test_cer_3(self): + ref = 'were wolf' + hyp = 'were wolf' + char_error_rate = error_rate.cer(ref, hyp) + self.assertTrue(abs(char_error_rate - 0.0) < 1e-6) + + def test_cer_4(self): ref = 'werewolf' char_error_rate = error_rate.cer(ref, ref) self.assertEqual(char_error_rate, 0.0) - def test_cer_4(self): + def test_cer_5(self): ref = u'我是中国人' hyp = u'我是 美洲人' char_error_rate = error_rate.cer(ref, hyp) self.assertTrue(abs(char_error_rate - 0.6) < 1e-6) - def test_cer_5(self): + def test_cer_6(self): ref = u'我 是 中 国 人' hyp = u'我 是 美 洲 人' char_error_rate = error_rate.cer(ref, hyp, remove_space=True) self.assertTrue(abs(char_error_rate - 0.4) < 1e-6) - def test_cer_6(self): + def test_cer_7(self): ref = u'我是中国人' char_error_rate = error_rate.cer(ref, ref) self.assertFalse(char_error_rate, 0.0) - def test_cer_7(self): + def test_cer_8(self): ref = '' hyp = 'Hypothesis' with self.assertRaises(ValueError): From 1325cd9b8ed0d2d12042cdd0aaad9a7087ded162 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Wed, 9 Aug 2017 16:21:44 +0800 Subject: [PATCH 122/335] Create 'tools' to hold tool scripts and add vocabulary dictionary building script. --- README.md | 6 +- tools/_init_paths.py | 16 +++++ tools/build_vocab.py | 63 +++++++++++++++++++ .../compute_mean_std.py | 1 + 4 files changed, 83 insertions(+), 3 deletions(-) create mode 100644 tools/_init_paths.py create mode 100644 tools/build_vocab.py rename compute_mean_std.py => tools/compute_mean_std.py (99%) diff --git a/README.md b/README.md index 96fbb7d09..9d39903b5 100644 --- a/README.md +++ b/README.md @@ -40,13 +40,13 @@ python datasets/librispeech/librispeech.py --help ### Preparing for Training ``` -python compute_mean_std.py +python tools/compute_mean_std.py ``` It will compute mean and stdandard deviation for audio features, and save them to a file with a default name `./mean_std.npz`. This file will be used in both training and inferencing. The default feature of audio data is power spectrum, and the mfcc feature is also supported. To train and infer based on mfcc feature, please generate this file by ``` -python compute_mean_std.py --specgram_type mfcc +python tools/compute_mean_std.py --specgram_type mfcc ``` and specify ```--specgram_type mfcc``` when running train.py, infer.py, evaluator.py or tune.py. @@ -54,7 +54,7 @@ and specify ```--specgram_type mfcc``` when running train.py, infer.py, evaluato More help for arguments: ``` -python compute_mean_std.py --help +python tools/compute_mean_std.py --help ``` ### Training diff --git a/tools/_init_paths.py b/tools/_init_paths.py new file mode 100644 index 000000000..3bb2fd197 --- /dev/null +++ b/tools/_init_paths.py @@ -0,0 +1,16 @@ +"""Set up paths for DS2""" + +import os.path +import sys + + +def add_path(path): + if path not in sys.path: + sys.path.insert(0, path) + + +this_dir = os.path.dirname(__file__) + +# Add project path to PYTHONPATH +proj_path = os.path.join(this_dir, '..') +add_path(proj_path) diff --git a/tools/build_vocab.py b/tools/build_vocab.py new file mode 100644 index 000000000..59be40318 --- /dev/null +++ b/tools/build_vocab.py @@ -0,0 +1,63 @@ +"""Build vocabulary dictionary from manifest files. + +Each item in vocabulary file is a character. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import codecs +import json +from collections import Counter +import os.path + +parser = argparse.ArgumentParser( + description='Build vocabulary dictionary from transcription texts.') +parser.add_argument( + "--manifest_paths", + type=str, + help="Manifest paths for building vocabulary dictionary." + "You can provide multiple manifest files.", + nargs='+', + required=True) +parser.add_argument( + "--count_threshold", + default=0, + type=int, + help="Characters whose count below the threshold will be truncated. " + "(default: %(default)s)") +parser.add_argument( + "--vocab_path", + default='datasets/vocab/zh_vocab.txt', + type=str, + help="Filepath to write vocabularies. (default: %(default)s)") +args = parser.parse_args() + + +def count_manifest(counter, manifest_path): + for json_line in codecs.open(manifest_path, 'r', 'utf-8'): + try: + json_data = json.loads(json_line) + except Exception as e: + raise Exception('Error parsing manifest: %s, %s' % \ + (manifest_path, e)) + text = json_data['text'] + for char in text: + counter.update(char) + + +def main(): + counter = Counter() + for manifest_path in args.manifest_paths: + count_manifest(counter, manifest_path) + + count_sorted = sorted(counter.items(), key=lambda x: x[1], reverse=True) + with codecs.open(args.vocab_path, 'w', 'utf-8') as fout: + for item_pair in count_sorted: + if item_pair[1] < args.count_threshold: break + fout.write(item_pair[0] + '\n') + + +if __name__ == '__main__': + main() diff --git a/compute_mean_std.py b/tools/compute_mean_std.py similarity index 99% rename from compute_mean_std.py rename to tools/compute_mean_std.py index 0cc84e730..da49eb4c0 100644 --- a/compute_mean_std.py +++ b/tools/compute_mean_std.py @@ -4,6 +4,7 @@ from __future__ import division from __future__ import print_function import argparse +import _init_paths from data_utils.normalizer import FeatureNormalizer from data_utils.augmentor.augmentation import AugmentationPipeline from data_utils.featurizer.audio_featurizer import AudioFeaturizer From 98f0b6d02d8d0f51429457ae1a413926d3da2c02 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Wed, 9 Aug 2017 17:20:24 +0800 Subject: [PATCH 123/335] update the mfcc computation in DS2 --- data_utils/featurizer/audio_featurizer.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/data_utils/featurizer/audio_featurizer.py b/data_utils/featurizer/audio_featurizer.py index 00f0e8a35..f0d223cfb 100644 --- a/data_utils/featurizer/audio_featurizer.py +++ b/data_utils/featurizer/audio_featurizer.py @@ -159,24 +159,27 @@ class AudioFeaturizer(object): if max_freq is None: max_freq = sample_rate / 2 if max_freq > sample_rate / 2: - raise ValueError("max_freq must be greater than half of " + raise ValueError("max_freq must not be greater than half of " "sample rate.") if stride_ms > window_ms: raise ValueError("Stride size must not be greater than " "window size.") - # compute 13 cepstral coefficients, and the first one is replaced + # compute the 13 cepstral coefficients, and the first one is replaced # by log(frame energy) - mfcc_feat = np.transpose( - mfcc( - signal=samples, - samplerate=sample_rate, - winlen=0.001 * window_ms, - winstep=0.001 * stride_ms, - highfreq=max_freq)) + mfcc_feat = mfcc( + signal=samples, + samplerate=sample_rate, + winlen=0.001 * window_ms, + winstep=0.001 * stride_ms, + highfreq=max_freq) # Deltas d_mfcc_feat = delta(mfcc_feat, 2) # Deltas-Deltas dd_mfcc_feat = delta(d_mfcc_feat, 2) + # transpose + mfcc_feat = np.transpose(mfcc_feat) + d_mfcc_feat = np.transpose(d_mfcc_feat) + dd_mfcc_feat = np.transpose(dd_mfcc_feat) # concat above three features concat_mfcc_feat = np.concatenate( (mfcc_feat, d_mfcc_feat, dd_mfcc_feat)) From 5ef300f3f0538dc9a70e57e2b23ab63fb2cf4110 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Wed, 9 Aug 2017 19:11:00 +0800 Subject: [PATCH 124/335] Make type of error rate optional. --- evaluate.py | 26 ++++++++++++++++++++++---- infer.py | 19 ++++++++++++++++++- model.py | 2 +- 3 files changed, 41 insertions(+), 6 deletions(-) diff --git a/evaluate.py b/evaluate.py index 592b7b527..7406e0bdd 100644 --- a/evaluate.py +++ b/evaluate.py @@ -10,6 +10,7 @@ import paddle.v2 as paddle from data_utils.data import DataGenerator from model import DeepSpeech2Model from error_rate import wer +from error_rate import cer import utils parser = argparse.ArgumentParser(description=__doc__) @@ -111,6 +112,14 @@ parser.add_argument( default='datasets/vocab/eng_vocab.txt', type=str, help="Vocabulary filepath. (default: %(default)s)") +parser.add_argument( + "--error_rate_type", + default='wer', + choices=['wer', 'cer'], + type=str, + help="There are total two error rate types including wer and cer. wer " + "represents for word error rate while cer for character error rate. " + "(default: %(default)s)") args = parser.parse_args() @@ -136,7 +145,14 @@ def evaluate(): rnn_layer_size=args.rnn_layer_size, pretrained_model_path=args.model_filepath) - wer_sum, num_ins = 0.0, 0 + if args.error_rate_type == 'wer': + error_rate_func = wer + error_rate_info = 'WER' + else: + error_rate_func = cer + error_rate_info = 'CER' + + error_sum, num_ins = 0.0, 0 for infer_data in batch_reader(): result_transcripts = ds2_model.infer_batch( infer_data=infer_data, @@ -153,10 +169,12 @@ def evaluate(): for _, transcript in infer_data ] for target, result in zip(target_transcripts, result_transcripts): - wer_sum += wer(target, result) + error_sum += error_rate_func(target, result) num_ins += 1 - print("WER (%d/?) = %f" % (num_ins, wer_sum / num_ins)) - print("Final WER (%d/%d) = %f" % (num_ins, num_ins, wer_sum / num_ins)) + print("%s (%d/?) = %f" % \ + (error_rate_info, num_ins, error_sum / num_ins)) + print("Final %s (%d/%d) = %f" % \ + (error_rate_info, num_ins, num_ins, error_sum / num_ins)) def main(): diff --git a/infer.py b/infer.py index df5953e59..3aba847e7 100644 --- a/infer.py +++ b/infer.py @@ -10,6 +10,7 @@ import paddle.v2 as paddle from data_utils.data import DataGenerator from model import DeepSpeech2Model from error_rate import wer +from error_rate import cer import utils parser = argparse.ArgumentParser(description=__doc__) @@ -111,6 +112,14 @@ parser.add_argument( type=float, help="The cutoff probability of pruning" "in beam search. (default: %(default)f)") +parser.add_argument( + "--error_rate_type", + default='wer', + choices=['wer', 'cer'], + type=str, + help="There are total two error rate types including wer and cer. wer " + "represents for word error rate while cer for character error rate. " + "(default: %(default)s)") args = parser.parse_args() @@ -147,6 +156,13 @@ def infer(): language_model_path=args.language_model_path, num_processes=args.num_processes_beam_search) + if args.error_rate_type == 'wer': + error_rate_func = wer + error_rate_info = 'wer' + else: + error_rate_func = cer + error_rate_info = 'cer' + target_transcripts = [ ''.join([data_generator.vocab_list[token] for token in transcript]) for _, transcript in infer_data @@ -154,7 +170,8 @@ def infer(): for target, result in zip(target_transcripts, result_transcripts): print("\nTarget Transcription: %s\nOutput Transcription: %s" % (target, result)) - print("Current wer = %f" % wer(target, result)) + print("Current %s = %f" % \ + (error_rate_info, error_rate_func(target, result))) def main(): diff --git a/model.py b/model.py index 2eb7c3594..e2f2903b6 100644 --- a/model.py +++ b/model.py @@ -185,7 +185,7 @@ class DeepSpeech2Model(object): # best path decode for i, probs in enumerate(probs_split): output_transcription = ctc_best_path_decoder( - probs_seq=probs, vocabulary=data_generator.vocab_list) + probs_seq=probs, vocabulary=vocab_list) results.append(output_transcription) elif decode_method == "beam_search": # initialize external scorer From 4b3f768df7d165467fbdc44e6d91fae4a1715dea Mon Sep 17 00:00:00 2001 From: yangyaming Date: Wed, 9 Aug 2017 20:03:53 +0800 Subject: [PATCH 125/335] Simplify description and codes. --- evaluate.py | 23 ++++++++--------------- infer.py | 19 ++++++------------- 2 files changed, 14 insertions(+), 28 deletions(-) diff --git a/evaluate.py b/evaluate.py index 7406e0bdd..82dcec3c2 100644 --- a/evaluate.py +++ b/evaluate.py @@ -9,8 +9,7 @@ import multiprocessing import paddle.v2 as paddle from data_utils.data import DataGenerator from model import DeepSpeech2Model -from error_rate import wer -from error_rate import cer +from error_rate import wer, cer import utils parser = argparse.ArgumentParser(description=__doc__) @@ -117,8 +116,8 @@ parser.add_argument( default='wer', choices=['wer', 'cer'], type=str, - help="There are total two error rate types including wer and cer. wer " - "represents for word error rate while cer for character error rate. " + help="Error rate type for evaluation. 'wer' for word error rate and 'cer' " + "for character error rate. " "(default: %(default)s)") args = parser.parse_args() @@ -145,13 +144,7 @@ def evaluate(): rnn_layer_size=args.rnn_layer_size, pretrained_model_path=args.model_filepath) - if args.error_rate_type == 'wer': - error_rate_func = wer - error_rate_info = 'WER' - else: - error_rate_func = cer - error_rate_info = 'CER' - + error_rate_func = cer if args.error_rate_type == 'cer' else wer error_sum, num_ins = 0.0, 0 for infer_data in batch_reader(): result_transcripts = ds2_model.infer_batch( @@ -171,10 +164,10 @@ def evaluate(): for target, result in zip(target_transcripts, result_transcripts): error_sum += error_rate_func(target, result) num_ins += 1 - print("%s (%d/?) = %f" % \ - (error_rate_info, num_ins, error_sum / num_ins)) - print("Final %s (%d/%d) = %f" % \ - (error_rate_info, num_ins, num_ins, error_sum / num_ins)) + print("Error rate [%s] (%d/?) = %f" % + (args.error_rate_type, num_ins, error_sum / num_ins)) + print("Final error rate [%s] (%d/%d) = %f" % + (args.error_rate_type, num_ins, num_ins, error_sum / num_ins)) def main(): diff --git a/infer.py b/infer.py index 3aba847e7..43643cde7 100644 --- a/infer.py +++ b/infer.py @@ -9,8 +9,7 @@ import multiprocessing import paddle.v2 as paddle from data_utils.data import DataGenerator from model import DeepSpeech2Model -from error_rate import wer -from error_rate import cer +from error_rate import wer, cer import utils parser = argparse.ArgumentParser(description=__doc__) @@ -117,8 +116,8 @@ parser.add_argument( default='wer', choices=['wer', 'cer'], type=str, - help="There are total two error rate types including wer and cer. wer " - "represents for word error rate while cer for character error rate. " + help="Error rate type for evaluation. 'wer' for word error rate and 'cer' " + "for character error rate. " "(default: %(default)s)") args = parser.parse_args() @@ -156,13 +155,7 @@ def infer(): language_model_path=args.language_model_path, num_processes=args.num_processes_beam_search) - if args.error_rate_type == 'wer': - error_rate_func = wer - error_rate_info = 'wer' - else: - error_rate_func = cer - error_rate_info = 'cer' - + error_rate_func = cer if args.error_rate_type == 'cer' else wer target_transcripts = [ ''.join([data_generator.vocab_list[token] for token in transcript]) for _, transcript in infer_data @@ -170,8 +163,8 @@ def infer(): for target, result in zip(target_transcripts, result_transcripts): print("\nTarget Transcription: %s\nOutput Transcription: %s" % (target, result)) - print("Current %s = %f" % \ - (error_rate_info, error_rate_func(target, result))) + print("Current error rate [%s] = %f" % + (args.error_rate_type, error_rate_func(target, result))) def main(): From c2e6378a64b1526076e4fb99aa6f9228d25891c8 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Wed, 9 Aug 2017 23:03:30 +0800 Subject: [PATCH 126/335] Simplify codes and comments. --- tools/_init_paths.py | 3 +++ tools/build_vocab.py | 32 ++++++++++++++------------------ 2 files changed, 17 insertions(+), 18 deletions(-) diff --git a/tools/_init_paths.py b/tools/_init_paths.py index 3bb2fd197..ddabb535b 100644 --- a/tools/_init_paths.py +++ b/tools/_init_paths.py @@ -1,4 +1,7 @@ """Set up paths for DS2""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function import os.path import sys diff --git a/tools/build_vocab.py b/tools/build_vocab.py index 59be40318..618f24985 100644 --- a/tools/build_vocab.py +++ b/tools/build_vocab.py @@ -1,4 +1,4 @@ -"""Build vocabulary dictionary from manifest files. +"""Build vocabulary from manifest files. Each item in vocabulary file is a character. """ @@ -11,13 +11,14 @@ import codecs import json from collections import Counter import os.path +import _init_paths +from data_utils import utils -parser = argparse.ArgumentParser( - description='Build vocabulary dictionary from transcription texts.') +parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( "--manifest_paths", type=str, - help="Manifest paths for building vocabulary dictionary." + help="Manifest paths for building vocabulary." "You can provide multiple manifest files.", nargs='+', required=True) @@ -25,25 +26,20 @@ parser.add_argument( "--count_threshold", default=0, type=int, - help="Characters whose count below the threshold will be truncated. " - "(default: %(default)s)") + help="Characters whose counts are below the threshold will be truncated. " + "(default: %(default)i)") parser.add_argument( "--vocab_path", default='datasets/vocab/zh_vocab.txt', type=str, - help="Filepath to write vocabularies. (default: %(default)s)") + help="File path to write the vocabulary. (default: %(default)s)") args = parser.parse_args() def count_manifest(counter, manifest_path): - for json_line in codecs.open(manifest_path, 'r', 'utf-8'): - try: - json_data = json.loads(json_line) - except Exception as e: - raise Exception('Error parsing manifest: %s, %s' % \ - (manifest_path, e)) - text = json_data['text'] - for char in text: + manifest_jsons = utils.read_manifest(manifest_path) + for line_json in manifest_jsons: + for char in line_json['text']: counter.update(char) @@ -54,9 +50,9 @@ def main(): count_sorted = sorted(counter.items(), key=lambda x: x[1], reverse=True) with codecs.open(args.vocab_path, 'w', 'utf-8') as fout: - for item_pair in count_sorted: - if item_pair[1] < args.count_threshold: break - fout.write(item_pair[0] + '\n') + for char, count in count_sorted: + if count < args.count_threshold: break + fout.write(char + '\n') if __name__ == '__main__': From d43b33c12d18054dff102820e809ff438d2c560c Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Thu, 10 Aug 2017 01:50:20 +0800 Subject: [PATCH 127/335] improve params tuning strategy for CTC beam search decoder --- tune.py | 78 ++++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 49 insertions(+), 29 deletions(-) diff --git a/tune.py b/tune.py index 328d67a11..5dc44a86c 100644 --- a/tune.py +++ b/tune.py @@ -15,10 +15,10 @@ import utils parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( - "--num_samples", - default=100, + "--batch_size", + default=128, type=int, - help="Number of samples for parameters tuning. (default: %(default)s)") + help="Minibatch size for parameters tuning. (default: %(default)s)") parser.add_argument( "--num_conv_layers", default=2, @@ -51,7 +51,7 @@ parser.add_argument( help="Number of cpu threads for preprocessing data. (default: %(default)s)") parser.add_argument( "--num_processes_beam_search", - default=multiprocessing.cpu_count() // 2, + default=multiprocessing.cpu_count(), type=int, help="Number of cpu processes for beam search. (default: %(default)s)") parser.add_argument( @@ -130,7 +130,12 @@ args = parser.parse_args() def tune(): - """Tune parameters alpha and beta on one minibatch.""" + """Tune parameters alpha and beta for the CTC beam search decoder + incrementally. The optimal parameters up to now would be output real time + at the end of each minibatch data, until all the development data is + taken into account. And the tuning process can be terminated at any time + as long as the two parameters get stable. + """ if not args.num_alphas >= 0: raise ValueError("num_alphas must be non-negative!") if not args.num_betas >= 0: @@ -144,14 +149,9 @@ def tune(): num_threads=args.num_threads_data) batch_reader = data_generator.batch_reader_creator( manifest_path=args.tune_manifest_path, - batch_size=args.num_samples, + batch_size=args.batch_size, sortagrad=False, shuffle_method=None) - tune_data = batch_reader().next() - target_transcripts = [ - ''.join([data_generator.vocab_list[token] for token in transcript]) - for _, transcript in tune_data - ] ds2_model = DeepSpeech2Model( vocab_size=data_generator.vocab_size, @@ -166,24 +166,44 @@ def tune(): params_grid = [(alpha, beta) for alpha in cand_alphas for beta in cand_betas] - ## tune parameters in loop - for alpha, beta in params_grid: - result_transcripts = ds2_model.infer_batch( - infer_data=tune_data, - decode_method='beam_search', - beam_alpha=alpha, - beam_beta=beta, - beam_size=args.beam_size, - cutoff_prob=args.cutoff_prob, - vocab_list=data_generator.vocab_list, - language_model_path=args.language_model_path, - num_processes=args.num_processes_beam_search) - wer_sum, num_ins = 0.0, 0 - for target, result in zip(target_transcripts, result_transcripts): - wer_sum += wer(target, result) - num_ins += 1 - print("alpha = %f\tbeta = %f\tWER = %f" % - (alpha, beta, wer_sum / num_ins)) + wer_sum = [0.0 for i in xrange(len(params_grid))] + ave_wer = [0.0 for i in xrange(len(params_grid))] + num_ins = 0 + num_batches = 0 + ## incremental tuning parameters over multiple batches + for infer_data in batch_reader(): + target_transcripts = [ + ''.join([data_generator.vocab_list[token] for token in transcript]) + for _, transcript in infer_data + ] + + num_ins += len(target_transcripts) + # grid search + for index, (alpha, beta) in enumerate(params_grid): + result_transcripts = ds2_model.infer_batch( + infer_data=infer_data, + decode_method='beam_search', + beam_alpha=alpha, + beam_beta=beta, + beam_size=args.beam_size, + cutoff_prob=args.cutoff_prob, + vocab_list=data_generator.vocab_list, + language_model_path=args.language_model_path, + num_processes=args.num_processes_beam_search) + + for target, result in zip(target_transcripts, result_transcripts): + wer_sum[index] += wer(target, result) + ave_wer[index] = wer_sum[index] / num_ins + print("alpha = %f, beta = %f, WER = %f" % + (alpha, beta, ave_wer[index])) + + # output on-line tuning result at the the end of current batch + ave_wer_min = min(ave_wer) + min_index = ave_wer.index(ave_wer_min) + print("Finish batch %d, optimal (alpha, beta, WER) = (%f, %f, %f)\n" % + (num_batches, params_grid[min_index][0], + params_grid[min_index][1], ave_wer_min)) + num_batches += 1 def main(): From b648f0c2d1161b8b520316bf137a3fd9d79b2eb1 Mon Sep 17 00:00:00 2001 From: wanghaoshuang Date: Thu, 10 Aug 2017 11:52:25 +0800 Subject: [PATCH 128/335] Implement uploading data in submit scripts and fix issues --- cloud/README.md | 15 +---- cloud/pcloud_submit.sh | 55 +++++++++++++++++-- cloud/pcloud_train.sh | 26 ++++----- ...pcloud_prepare_data.py => prepare_data.py} | 4 +- cloud/{pcloud_split_data.py => split_data.py} | 6 +- pcloud_train.sh | 26 ++++----- 6 files changed, 80 insertions(+), 52 deletions(-) rename cloud/{pcloud_prepare_data.py => prepare_data.py} (95%) rename cloud/{pcloud_split_data.py => split_data.py} (92%) diff --git a/cloud/README.md b/cloud/README.md index e7855ba82..7c23e0dc0 100644 --- a/cloud/README.md +++ b/cloud/README.md @@ -21,21 +21,8 @@ The we can get job name 'deepspeech20170727130129' at last line ``` $ paddlecloud logs -n 10000 deepspeech20170727130129 -$ ==========================deepspeech20170727130129-trainer-6vk3m========================== -label selector: paddle-job-pserver=deepspeech20170727130129, desired: 1 -running pod list: [('Running', '10.1.3.6')] -label selector: paddle-job=deepspeech20170727130129, desired: 1 -running pod list: [('Running', '10.1.83.14')] -Starting training job: /pfs/dlnel/home/****@baidu.com/jobs/deepspeech20170727130129, num_gradient_servers: 1, trainer_id: 0, version: v2 -I0727 05:01:42.969719 25 Util.cpp:166] commandline: --num_gradient_servers=1 --ports_num_for_sparse=1 --use_gpu=1 --trainer_id=0 --pservers=10.1.3.6 --trainer_count=4 --num_passes=1 --ports_num=1 --port=7164 -[INFO 2017-07-27 05:01:50,279 layers.py:2430] output for __conv_0__: c = 32, h = 81, w = 54, size = 139968 -[WARNING 2017-07-27 05:01:50,280 layers.py:2789] brelu is not recommend for batch normalization's activation, maybe the relu is better -[INFO 2017-07-27 05:01:50,283 layers.py:2430] output for __conv_1__: c = 32, h = 41, w = 54, size = 70848 -I0727 05:01:50.316176 25 MultiGradientMachine.cpp:99] numLogicalDevices=1 numThreads=4 numDevices=4 -I0727 05:01:50.454787 25 GradientMachine.cpp:85] Initing parameters.. -I0727 05:01:50.690007 25 GradientMachine.cpp:92] Init parameters done. ``` -[More optins and cmd aoubt paddle cloud](https://github.com/PaddlePaddle/cloud/blob/develop/doc/usage_cn.md) +[More options and cmd about paddle cloud](https://github.com/PaddlePaddle/cloud/blob/develop/doc/usage_cn.md) ## Run DS2 by customize data TODO diff --git a/cloud/pcloud_submit.sh b/cloud/pcloud_submit.sh index 5d0535011..9ea5d9310 100644 --- a/cloud/pcloud_submit.sh +++ b/cloud/pcloud_submit.sh @@ -1,9 +1,54 @@ -DS2_PATH=../ -tar -czf deepspeech.tar.gz ${DS2_PATH} +# +TRAIN_MANIFEST="/home/work/wanghaoshuang/ds2/pcloud/models/deep_speech_2/datasets/manifest.dev" +TEST_MANIFEST="/home/work/wanghaoshuang/ds2/pcloud/models/deep_speech_2/datasets/manifest.dev" +VOCAB_PATH="/home/work/wanghaoshuang/ds2/pcloud/models/deep_speech_2/datasets/vocab/eng_vocab.txt" +MEAN_STD_PATH="/home/work/wanghaoshuang/ds2/pcloud/models/deep_speech_2/compute_mean_std.py" +CLOUD_DATA_DIR="/pfs/dlnel/home/wanghaoshuang@baidu.com/deepspeech2/data" +CLOUD_MODEL_DIR="/pfs/dlnel/home/wanghaoshuang@baidu.com/deepspeech2/model" + +DS2_PATH=${PWD%/*} + +rm -rf ./tmp +mkdir ./tmp + +paddlecloud ls ${CLOUD_DATA_DIR}/mean_std.npz +if [ $? -ne 0 ];then + cp -f ${MEAN_STD_PATH} ./tmp/mean_std.npz + paddlecloud file put ./tmp/mean_std.npz ${CLOUD_DATA_DIR}/ +fi + +paddlecloud ls ${CLOUD_DATA_DIR}/vocab.txt +if [ $? -ne 0 ];then + cp -f ${VOCAB_PATH} ./tmp/vocab.txt + paddlecloud file put ./tmp/vocab.txt ${CLOUD_DATA_DIR}/ +fi + +paddlecloud ls ${CLOUD_DATA_DIR}/cloud.train.manifest +if [ $? -ne 0 ];then +python prepare_data.py \ +--manifest_path=${TRAIN_MANIFEST} \ +--out_tar_path="./tmp/cloud.train.tar" \ +--out_manifest_path="tmp/cloud.train.manifest" +paddlecloud file put ./tmp/cloud.train.tar ${CLOUD_DATA_DIR}/ +paddlecloud file put ./tmp/cloud.train.manifest ${CLOUD_DATA_DIR}/ +fi + +paddlecloud ls ${CLOUD_DATA_DIR}/cloud.test.manifest +if [ $? -ne 0 ];then +python prepare_data.py \ +--manifest_path=${TEST_MANIFEST} \ +--out_tar_path="./tmp/cloud.test.tar" \ +--out_manifest_path="tmp/cloud.test.manifest" +paddlecloud file put ./tmp/cloud.test.tar ${CLOUD_DATA_DIR}/ +paddlecloud file put ./tmp/cloud.test.manifest ${CLOUD_DATA_DIR}/ +fi + +rm -rf ./tmp + JOB_NAME=deepspeech`date +%Y%m%d%H%M%S` cp pcloud_train.sh ${DS2_PATH} paddlecloud submit \ --image wanghaoshuang/pcloud_ds2:latest-gpu-cudnn \ +-image bootstrapper:5000/wanghaoshuang/pcloud_ds2:latest-gpu-cudnn \ -jobname ${JOB_NAME} \ -cpu 4 \ -gpu 4 \ @@ -13,5 +58,5 @@ paddlecloud submit \ -pservers 1 \ -psmemory 10Gi \ -passes 1 \ --entry "sh pcloud_train.sh" \ -. +-entry "sh pcloud_train.sh ${CLOUD_DATA_DIR} ${CLOUD_MODEl_DIR}" \ +${DS2_PATH} diff --git a/cloud/pcloud_train.sh b/cloud/pcloud_train.sh index 385281cef..ebf73bbb7 100644 --- a/cloud/pcloud_train.sh +++ b/cloud/pcloud_train.sh @@ -1,37 +1,35 @@ -DATA_PATH=/pfs/dlnel/public/dataset/speech/libri +DATA_PATH=$1 +MODEL_PATH=$2 #setted by user -TRAIN_MANI=${DATA_PATH}/manifest_pcloud.train +TRAIN_MANI=${DATA_PATH}/cloud.train.manifest #setted by user -DEV_MANI=${DATA_PATH}/manifest_pcloud.dev +DEV_MANI=${DATA_PATH}/cloud.test.manifest #setted by user -TRAIN_TAR=${DATA_PATH}/data.train.tar +TRAIN_TAR=${DATA_PATH}/cloud.train.tar #setted by user -DEV_TAR=${DATA_PATH}/data.dev.tar +DEV_TAR=${DATA_PATH}/cloud.test.tar #setted by user VOCAB_PATH=${DATA_PATH}/eng_vocab.txt #setted by user MEAN_STD_FILE=${DATA_PATH}/mean_std.npz -tar -xzf deepspeech.tar.gz -rm -rf ./cloud/data/* - # split train data for each pcloud node -python ./cloud/pcloud_split_data.py \ +python ./cloud/split_data.py \ --in_manifest_path=$TRAIN_MANI \ --data_tar_path=$TRAIN_TAR \ ---out_manifest_path='./cloud/data/train.mani' +--out_manifest_path='./local.train.manifest' # split dev data for each pcloud node -python pcloud_split_data.py \ +python ./cloud/split_data.py \ --in_manifest_path=$DEV_MANI \ --data_tar_path=$DEV_TAR \ ---out_manifest_path='./cloud/data/dev.mani' +--out_manifest_path='./local.test.manifest' python train.py \ --use_gpu=1 \ --trainer_count=4 \ --batch_size=256 \ --mean_std_filepath=$MEAN_STD_FILE \ ---train_manifest_path='./cloud/data/train.mani' \ ---dev_manifest_path='./cloud/data/dev.mani' \ +--train_manifest_path='./local.train.manifest' \ +--dev_manifest_path='./local.test.manifest' \ --vocab_filepath=$VOCAB_PATH \ diff --git a/cloud/pcloud_prepare_data.py b/cloud/prepare_data.py similarity index 95% rename from cloud/pcloud_prepare_data.py rename to cloud/prepare_data.py index 2ffdaf630..dc1e2d279 100644 --- a/cloud/pcloud_prepare_data.py +++ b/cloud/prepare_data.py @@ -25,12 +25,12 @@ parser.add_argument( help="Manifest of target data. (default: %(default)s)") parser.add_argument( "--out_tar_path", - default="./data/dev.tar", + default="./tmp/cloud.train.tar", type=str, help="Output tar file path. (default: %(default)s)") parser.add_argument( "--out_manifest_path", - default="./data/dev.mani", + default="./tmp/cloud.train.manifest", type=str, help="Manifest of output data. (default: %(default)s)") args = parser.parse_args() diff --git a/cloud/pcloud_split_data.py b/cloud/split_data.py similarity index 92% rename from cloud/pcloud_split_data.py rename to cloud/split_data.py index 8f98799aa..78bf31742 100644 --- a/cloud/pcloud_split_data.py +++ b/cloud/split_data.py @@ -11,17 +11,17 @@ import argparse parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( "--in_manifest_path", - default='./cloud/data/dev.mani', + default='./cloud.train.manifest', type=str, help="Input manifest path. (default: %(default)s)") parser.add_argument( "--data_tar_path", - default='./cloud/data/dev.tar', + default='./cloud.train.tar', type=str, help="Data tar file path. (default: %(default)s)") parser.add_argument( "--out_manifest_path", - default='./cloud/data/dev.mani.split', + default='./local.train.manifest', type=str, help="Out manifest file path. (default: %(default)s)") args = parser.parse_args() diff --git a/pcloud_train.sh b/pcloud_train.sh index b13e23e95..ebf73bbb7 100644 --- a/pcloud_train.sh +++ b/pcloud_train.sh @@ -1,37 +1,35 @@ -DATA_PATH=/pfs/dlnel/public/dataset/speech/libri +DATA_PATH=$1 +MODEL_PATH=$2 #setted by user -TRAIN_MANI=${DATA_PATH}/manifest_pcloud.train +TRAIN_MANI=${DATA_PATH}/cloud.train.manifest #setted by user -DEV_MANI=${DATA_PATH}/manifest_pcloud.dev +DEV_MANI=${DATA_PATH}/cloud.test.manifest #setted by user -TRAIN_TAR=${DATA_PATH}/data.train.tar +TRAIN_TAR=${DATA_PATH}/cloud.train.tar #setted by user -DEV_TAR=${DATA_PATH}/data.dev.tar +DEV_TAR=${DATA_PATH}/cloud.test.tar #setted by user VOCAB_PATH=${DATA_PATH}/eng_vocab.txt #setted by user MEAN_STD_FILE=${DATA_PATH}/mean_std.npz -tar -xzvf deepspeech.tar.gz -rm -rf ./cloud/data/* - # split train data for each pcloud node -python ./cloud/pcloud_split_data.py \ +python ./cloud/split_data.py \ --in_manifest_path=$TRAIN_MANI \ --data_tar_path=$TRAIN_TAR \ ---out_manifest_path='./cloud/data/train.mani' +--out_manifest_path='./local.train.manifest' # split dev data for each pcloud node -python pcloud_split_data.py \ +python ./cloud/split_data.py \ --in_manifest_path=$DEV_MANI \ --data_tar_path=$DEV_TAR \ ---out_manifest_path='./cloud/data/dev.mani' +--out_manifest_path='./local.test.manifest' python train.py \ --use_gpu=1 \ --trainer_count=4 \ --batch_size=256 \ --mean_std_filepath=$MEAN_STD_FILE \ ---train_manifest_path='./cloud/data/train.mani' \ ---dev_manifest_path='./cloud/data/dev.mani' \ +--train_manifest_path='./local.train.manifest' \ +--dev_manifest_path='./local.test.manifest' \ --vocab_filepath=$VOCAB_PATH \ From b57dc63e1f13ff0c3912842d03e1dfdff39ae28c Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Thu, 10 Aug 2017 11:55:03 +0800 Subject: [PATCH 129/335] update readme in DS2 --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 9d39903b5..2cc71305a 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ ### Prerequisites - **Python = 2.7** only supported; - - **cuDNN >= 6.0** is required to utilize NVIDIA GPU platform in the installation of PaddlePaddle, and the **CUDA toolkit** with proper version suitable for cuDNN. The cuDNN library below 6.0 is found to yield a fatal error in batch normalization when handling utterances with long duration in inference. + - **cuDNN** library is required to utilize NVIDIA GPU platform sufficiently in the installation of PaddlePaddle, and the **CUDA toolkit** with proper version suitable for cuDNN. ### Setup From 9e08727c950a1df8d8b0775329bb17e9a3435b2f Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Thu, 10 Aug 2017 12:11:47 +0800 Subject: [PATCH 130/335] remove prerequisites part in the readme of DS2 --- README.md | 7 ------- 1 file changed, 7 deletions(-) diff --git a/README.md b/README.md index 2cc71305a..9c2a0872b 100644 --- a/README.md +++ b/README.md @@ -2,13 +2,6 @@ ## Installation -### Prerequisites - - - **Python = 2.7** only supported; - - **cuDNN** library is required to utilize NVIDIA GPU platform sufficiently in the installation of PaddlePaddle, and the **CUDA toolkit** with proper version suitable for cuDNN. - -### Setup - ``` sh setup.sh export LD_LIBRARY_PATH=$PADDLE_INSTALL_DIR/Paddle/third_party/install/warpctc/lib:$LD_LIBRARY_PATH From 08a6d07811805c8b930f6a200c648bb535aec6f5 Mon Sep 17 00:00:00 2001 From: wanghaoshuang Date: Fri, 11 Aug 2017 14:33:09 +0800 Subject: [PATCH 131/335] remove binary files --- data_utils/__init__.pyc | Bin 166 -> 0 bytes data_utils/audio.pyc | Bin 26476 -> 0 bytes data_utils/augmentor/__init__.pyc | Bin 176 -> 0 bytes data_utils/augmentor/augmentation.pyc | Bin 5774 -> 0 bytes data_utils/augmentor/base.pyc | Bin 1811 -> 0 bytes data_utils/augmentor/impulse_response.pyc | Bin 2135 -> 0 bytes data_utils/augmentor/noise_perturb.pyc | Bin 2738 -> 0 bytes .../augmentor/online_bayesian_normalization.pyc | Bin 2451 -> 0 bytes data_utils/augmentor/resample.pyc | Bin 1754 -> 0 bytes data_utils/augmentor/shift_perturb.pyc | Bin 1893 -> 0 bytes data_utils/augmentor/speed_perturb.pyc | Bin 2460 -> 0 bytes data_utils/augmentor/volume_perturb.pyc | Bin 2023 -> 0 bytes data_utils/data.pyc | Bin 14736 -> 0 bytes data_utils/featurizer/__init__.pyc | Bin 177 -> 0 bytes data_utils/featurizer/audio_featurizer.pyc | Bin 7371 -> 0 bytes data_utils/featurizer/speech_featurizer.pyc | Bin 4608 -> 0 bytes data_utils/featurizer/text_featurizer.pyc | Bin 3453 -> 0 bytes data_utils/normalizer.pyc | Bin 4422 -> 0 bytes data_utils/speech.pyc | Bin 6754 -> 0 bytes data_utils/utils.pyc | Bin 1489 -> 0 bytes 20 files changed, 0 insertions(+), 0 deletions(-) delete mode 100644 data_utils/__init__.pyc delete mode 100644 data_utils/audio.pyc delete mode 100644 data_utils/augmentor/__init__.pyc delete mode 100644 data_utils/augmentor/augmentation.pyc delete mode 100644 data_utils/augmentor/base.pyc delete mode 100644 data_utils/augmentor/impulse_response.pyc delete mode 100644 data_utils/augmentor/noise_perturb.pyc delete mode 100644 data_utils/augmentor/online_bayesian_normalization.pyc delete mode 100644 data_utils/augmentor/resample.pyc delete mode 100644 data_utils/augmentor/shift_perturb.pyc delete mode 100644 data_utils/augmentor/speed_perturb.pyc delete mode 100644 data_utils/augmentor/volume_perturb.pyc delete mode 100644 data_utils/data.pyc delete mode 100644 data_utils/featurizer/__init__.pyc delete mode 100644 data_utils/featurizer/audio_featurizer.pyc delete mode 100644 data_utils/featurizer/speech_featurizer.pyc delete mode 100644 data_utils/featurizer/text_featurizer.pyc delete mode 100644 data_utils/normalizer.pyc delete mode 100644 data_utils/speech.pyc delete mode 100644 data_utils/utils.pyc diff --git a/data_utils/__init__.pyc b/data_utils/__init__.pyc deleted file mode 100644 index 62144b80b4a3a1d94314a0974b53531f0c4f0437..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 166 zcmZSn%*!R(QW2TV00oRd+5w1*S%5?e14FO|NW@PANHCxg#r{As{fzwFRQ>Y&qHO*0 z#Ju#3#QfrnQXoS=rPxTnAUP+$G(|r*KP5G%SU)8-wIIH@AT>2PBi=|qC9xzizO*D0 iC=nl@nU`4-AFo$XS;7G{(*~%aG$+*#WOFePGXMbMu_zn> diff --git a/data_utils/audio.pyc b/data_utils/audio.pyc deleted file mode 100644 index af544fd49d75f4bcf6460163967b65b3c69d14f8..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 26476 zcmdsA+ix7#c|W_nNR(EhtSe>7_BgRZ+LkCwc4U~g6J2cOQkyo0a@@#HHp87EIpXfj zdS_OY66qn9(Hc$L6h+Z{`qBVx0|afK`c$AmpZd~=0tNC1v_XLuMSwiDFX`|1eP_${5dZgYKik)ATwrzAaT z+IviAuerY0m2a5qjUwMZbA6u)cbRCvSwZW&&3ne&Gv@2pr%gCvmgc6=+3ZnVuB1sG z#7XAmYmpc9!#MS_Xtfh1xz}n3SvDUdlbNgGl279jEN5xEpGVEO(@lH1zSxZyVSF>r z;xy4G`|+gPi<7*$(ob4Bo-gW)NxaD7)g)-=4J0qI#U*XfP3bsZT;`Pi4w{WHr5FLO zbq)^Wz!YM7nHcB1GBIJoaY;;?2=g|<^t~GAjklkZpGosxT^{a{hr5bAd*xPV2t^ul zJ5{{JEbcMkUh^Ja?KfdVTEQgZ;XZkIz=Zqd;R7a|mc&65;vsGivc>C%Oo*SulKHR+ zkC^bNq>(sg!Wl^)HQ{k7iznLK2c_(c+@6p;$4&I02~SG;grpym^hrshITU=@yay6{ z$V3mD@DWzXNq9tVz2erB+eeGrN9Fc2#qDS0_Or$9XHEDR)7Kv};VDU+GXB!sELZU; zR_E0;3HrUDJw!U7lDrqly3g;~IDWZ{cj@2#ZYY1((*EsuAt zwIE5NcJ_^VEuY`+Mn(PwFA0NQFStFw(oTc?>GPYLX!e3U8f?bnqs_J1oS;>$r5D6m zlzG?C-pjpS+FS6xvLf7}{qw>g4?O8joOwx_d(sCl?Rls|t8Y;#6u(zU$*Ps zJMEpbi^00$IvS5D7Z&Hp)cFZqnzq(rEt`4Tv`dszNEFY7_w9|$KZa{e62_gJ)h1oJ z??gc|$7z(iENZXhyKraeB;V`OQpFr@ja+25&#tAN=%@Bq#-EmY#2q|X4Z|O)>`xYS*~xhpT~G1@^HR;Tl?OOlQ?fSkF!}0#!MWn zP1O$8TI@1^*;QHXcaX@7rDj4Haed14PMW-CmPiR$P^@^}tdE)AdG~b8+!&V^lHj{V zd6!R^8H5C;&-PAPluaC&E{D&l5sqy-eOPq%eOMT$B}^OaHPheijX0hAw4G7akDaE zjxI^}v!_tRj(n}(36e9ABVn-Ij=Wyf?L`?7KtR3ir7Hy@n9nYv&?!joinxC9RCwCT zWnn|P51|uS_{CH6=U1|*b=vE7GPwya&ylQrVhQ2kN>% z$voM=vgnrbTG7`^+_p-JQdU$F6IOrnZ8yl*7QCxAMa9FXmc=QnM^&^S8mlb696-tC zKxnIX_^@oJC&I&NPKyPB5c42uReHA2V~sYSMR2Ybkp-a$7S5+dX8H>$IqP!bK)+9HgM~dS>J7X7fB@vC__jDlwqI$a`Ajj=tgkvI_Q2w z0K%ks76b*o&jNr4c=20I-{FffdErX@r2)qP9HWJ!=)t>!7s_(~D6dg;tomArbbZ|P zelD*u0(r7BR0rB)rkNU>05p*v3}k|KX3+_Am=m+h@E*DB{cf2kTz7GDYMeEgGMFEE zGy`J=X39NNzdJ=?bqCi8WgX6c_SaybSJEJa3iGlKU_B5U?`mIIB~(-k_#54O};1zr6vN|ZZEwVhY{p?zJ@vr%dHeb zWi?EZZIO#mlQjWN-p0JG#z}&?A0DstbV0GQ>zbWJ^kKn!8NC`cM7$h5KwudT#Z^}A zoa@w9!;8EyS_%5?96esU8N@{Iqo)iM!hw>O?0k{pm?Q}e4^xIB;?DJJVL+l}>f$aqHf><@)*MfGxV$~@5VC;61LGo#@ z2fhN$XHkCIi{@A7Ju+0fvbhtkH{LQAA|r8f1>U1F!<^~hwB2r+#ey*A zPJxmi=PGMcw!Zm{n5QhoPJVD1T3Th9En8~CXaY2bktu7b{Tk82eD>@>g(I?T>bN)e z1y6<~@MQ&dZ(4Nsm{pW3P3gJx5xMOh zyFd$RC-pJ0p)3g6v|9OPK6(Zhb7UGu*HJ+InOdW^&;CwIs<1{PdP%_}id=VNESVt`QSeeYtS& zrmNx&*bHHwu>M|3IOVw7Uly}9(mHADmBgIpmd0`5V zAt6*J$XjdG?v8RM!iiE86=;KU)#)0+pD`BRWKZIfFUxnumpJ{^?|k&ELR!`=jV2# zwEsC?s3z2-BnCe%t!ss2RlQ17HV!P+#G$v#}C>j!Ix>ZfYr<)uL3uNa3MvQsNyYgj`xg~-Z~2w-JgAztVYpg_uA zCo79TK+GCo0Ib8njFZwxzq-JMTLcCVaH1du>(q9XYxPKxbHRHhN>*u&0AjIzK+B2^ zQ5F+ME=vtT*sAkhH~>aTwfTZeR1;~>Y`&qI3XQa+gB?ltqX%8(D)x2@*MoRkl(Z-= z$>N#+I8vo*5}c!mhrZ06gfgtZ6Rbx~JNbW%d-X2tJzf(yC{)Crk~f5k0wh2zH}oo0 z1d;&-#ZV%mpe|qw0P5rd%y1ac7u0mV0CnNe3@Hs&pa5)a*}zgYa{S&{y@p+^r8)ZA zvLAu~mx7zo5Ya)9gmJb`dAt$a95QrDno+8t8Kf?$H!pon@{TZnDVs7j1Y&j-6jbB> z4v>Cjy@fc-c0Ryr7QYh+w7;z7VBcuPVz7Cyb!l`!+sy(ppSv)7S`ts6pFO=r$7Y3u znZeWKg=c2xy_c;02sPp|40qY~WDt^#(g>2V&;?n)%dOR@qG!dBc>T(&&2tw7S4q#^ z!>7*|4{hq|E3aL;Ce$T)UV80~%dfmFFinPX;Tbhd(jL^6SOZu&wFY#~7DWK(G*LF~ z=qM!DZ8zB_P)mjsywQtu-9JzPPSGyz++yhzNy`3#SQ*+{vlDc8k_V+5X%mzz4!$3e z@M-Coy+>+ewO#UWPkj>UF_;#Uf$%$~1W!A+2^0hA<_0OsNKC^*+P%j8WG15;9yeaA^6*gGu@d~C~ zOkw@Ns)oCYE+qT_AH>O-ZaaYH0Y*)-6(Kv6s}!?ZOF9b9Edmfq^GUlz7)vmUPQg(U zqlQ9ssN%8Zt4gJG>R1=ZwE}+VzAo-W-C&Q*4vuJ!m}w-G8-*>kZwj0W)x|wGS051? z7QuD4=yc_;Y%x`#C{#7nHasG3_hpNIW#>ty*eeH$#JrTPqGSo~CMa|e*`;;(=ZvQRSpns@>k>l5h*#+l67Z3dazkcnH7mGNKU3bXLk$1mlO3 zr-eWnrXRMo;C*RCi-Gi|(sYIP1U+d{fem-gY0!7T+|>hS+0n&);VSFKPvd7?gIy`z`l^ZMdCoKzVPdOJ8K` zeb?}!_Z0gsTC|ONxC=JCXEF>o%As%C*s?)nrH@|FMp?jnK^iP;j6s(so8`r=hL>S8 zvkf|c{V!euyJ10_cHVyyd4;vIJnagKhm&hjeJ|uR1?LYiMe|opV)6n4@vx`@%t2mR z7syQP0T>k^G709AI1Am6aJ`Y+v>i4T0LX0;Z4|eZ$$T3limQdt0^akrz28aRr$M0< zBT-x-UGy0ve$2iG^fc8F7&>FSuh(JC5$rcONXBS>XLVp z+jLds@R2owV?2u_u&;xqhmc(Gu1Ru>_PlF^@7fPXg|4y+``aX9I-$Y1n+7(b-++Hm z12)auXC#VHB|%B>ir6dt8FCbbX^P;sXAVC7;A#)CwNnHFM+YbAi8ARdSF3E9k>)U+@PoxOALP-!<^}R z&EiZlY`=@@F{(lQRl~D1FbhNTG+NS-qIxhYgrqou99Y>ozP9EGUTix)W_7D%DI&Vn zg2sT!P_9nd<*WrhdLxlgvVlg+2Dv*LUWQz21%23<(gd18{K2vmlMW+>1K|mqTk^PF z9A18WQ5|{=$5c1->qv;P)eC#zD7el<4$5lfYB`>)~cPRb^>?&f_AWX)P>IxuC#d!qLvAF5ejT#zw88Ma4RJzK3G(@ZG&4DfgP0yAA0uvNlv8I^1(^Y_mY@!&t;tDG4F$OLWhf zR@#9(5m7imB?-~cS<+0 zgHDHyFzTa1{q)n?Mm#967!CkbWyLeaGR1-j1X3()dfN38^|BIed~EFQ2J|5%by-gE zx~24GMAQrk3Kgg_YLWl1%lKtncCdpQrx5ZgsLOKORBP>*1yAdS$)=kR5I>a-IB_~u z_w=OW+JLKJWGfQB(C;=8wH@4sJ#1aD&@~zkwN(JqM%*q5rWn`Y-=Nh(G&2LLatiAf zR2QiOQfVdb!7(z>bZI0FR#M#A5G@EeR02AokwwZ@3U+eXqWPkf-8{hxVi&a-nBbNAZ@mb{Ime~cG_eH17q_G|S!S-+z@^29shIBut_=brK@K4n)x z#HRlZzTJ&WvGO@<8dxQHGB7bY)asdHJ8zK!KjdvWuSK%xI#VP zw1J3mTBTz(Ja59>6dc=xP`|CqSTgO79v!h=tAsPFamIY5M@zV5^K|)8xjP9_ z{4VN$iR4M+(ClTrjM$aSaHk6e^10jN{u|tYx&Rv(GIN0HeMvD2;f44&G$LLJ6owGP zSOlK1fKSk%00tTOcETiYNQN<~F)K2l8AQxzhG!(hxMcXfB7-3ZXb!WXZbogT{s0T?;kaHGs3 zE^w=|c(qk264t0L;(RIZOQ;J=*aZ-g z!qtnyoitA9qd_Z0aKaLWfb5L!Z{QSF1E*ff-5j!G(h8oEIvg-+%aDrS9(EvMQ zPIgDd4^l?Ose1*+3w);HQdQFhoQ)Fih^-(T{0Qrj)ZQoPSao-gC+7=U02%wIfVj!l;&Z@>XALVN zzbf(;2_Sq=2&c{n0_vhUh0oHqDj?aD-6?b`!iJ5dRV;wqwAcM`{nqRkS@b zI&FU$nW^n@_6G-;aI#$wFhSFcC)7SaBWzeb+oS*i9QvAlg-VX04BZD3GqH|$>&3e| z-sur1SMJxW^gy|z02V4i%A$WEuf;!Gc`bdSB*8%^w4yM0*k7xlS!L9JNj{M?D!2w* zyI5F23iLolOBu>%UuaEwyoumP<&?Nz081W4DMn{F;PC^@3%A8g@&pe-g41x6Pm%Bzb?!<0@_hPC}Il^zi@Eah0p!wre1W4>%Yz zbX*0KFnko|V;xk{fDL~Zof(p1;vkTNR*Gmuo>4@U&Q(WU5<+IzM^5t@I=nN^T@O{d=C-BHLW^g*Zf!!eeEsl%O70cry)3_e39mhYBgOq_z4t8)u=>r=D za&U$rUmEaH9_Lgsfees8FNiH7pvXxsz!^Gffn*@I;LD|Hqvn!I40(DCPitkkz>g~; z1N(yZgqUu?k=GoCDcBfWdtusX-~Qx8hvR0u(IuO{;I2|6dGlUmB8POQ;rcF z%b16#Q;pooz6e687?8eYZi<>B32txYTAcX#F$uArU@SBgQPqm`tNxa3t~y?zqI zEa~{)McK**2%Tx=Xp8+?ViL@uZt{wNC5qcgwq!qa2+guPKO%RMVNHWESJU-T-1i<{ zgZp|<3i}8mxq*{42#>@QaH6uF#AViqFu-HgOmB;pMGi^&eGpNUU5p39^xDiMvs?QN+)(5sO_S-%&&b0MHFUlblXiD1< zyiM5pC|lg*YTs0{ibfpuTfYJ}CDV;&;lV;H+~1d-k>dLz_8lE>oC}Sf_~4CG@DwL$ zs53aajXElki`F;ex>d5`K#ESp;w&9{qe#TZ}fGDsof{$=w zLEp&vZ%56J#y*}oFtlR-za@v8vjK5Y-h6z);r}}5Xi@$65{>X1xc3qESy96N_xOw% zWn*n8)$CC;QZPdktSVCDHh0l*&SM&JQ* z>^3uC+!?!TFv-*PH}>!iZF6MYWp3=Xl|z|hoWkRHiWmhsZy^l7ulPU-+z0kp#SHA7 zO9-tmb$XKWY(#;A!PEhx9tqD|mRZl_=^4!h0*lj9s>8>~dIoEN3aC1=WNX6Kej0Tv zYN}Y)j;LV+h$f=ADZqH4BJ5W_@Iep?DeQ#t$OJxQ!mR~Y!N-#%GKvLnQKv)Wow%VG zZt~XKoZBIdEa^4F%djt`9!{Xv>vIC}VUsdex|~hxqOc`Lnii^y7#1p;llvjwo5iQm z&{yJ>>XNuoESFy1Qe13Gxr#Bp7`oyDuJC3Z>?VCvCFKK(qv7F=8o?lR867Maclw>- zfH|&C5%Vo;V(E3;z7=r{vYW&&%;4zFsED~T7H-?>Tp!|Oh2PWzQnnFlfyBQ#eOFA2 zu5u-8Ym=k+V_A{>q{bcPu@7NNR&m(i`R@`If($+iG;}m&F*UKB5CNl-KOn9V`w*nQ z!V69>fnDd~usY%K8V?w%+kD%la>nTZmZ?}2*;k=JluxS@@=<|l_NB5qWkmTnOw@W@ z4QL+z$|32nIZud0p~=5hUMmXB`@h1v=7>ktv-K`h_jnn?yWI2g-{yt-TflKEwV!?D z$02#+zt8s!lJQ@`MdPfwfhHa@nPRcbRf@@8{${@Qzsd6KmpK2WR1x1Qn~(Ygann1A z5EzpP5e;AfE%sA_0`6~`M;^vERgMrbi#tq_^FuJokK)*S3)$qGgZwKeDL98zzW9NP zbw{7x#A9DSKxt`4?3JoBd+>`$)-@-w3BV&I1TvT- z1i-uF_6w7hc^DD>=T_Ct!=n3kiAs_Y6j7ZA*@3H&1l>!Y&Wtd)jtvi!#8@#S_9&RL z24}>@?E;FPu+y`Q+yjhKi0rLtq{u-mw0XD2(#jwR(peIVT^NV6F6Wxa^;tmRX(P#^teA0l~ zeYE@<7I$$(l2awpfI^&v8cS+%;V`;<91Cl3e0|dNmgHVGL@Du6045<;Mz%$`*@|Ou z6K3YniRZYPds?b=bxdj24Dw4c;ZYq#iJ5je?{N(LpV5i0c5f1YuBV3Dau9* znJnK`kRAS2bQS;o?2F~Mu!XfG&hw4LeaE5NL7u^$*W=M74B!|X?bWlaBpLk*LyHeH z0TwYZaw>=fpLojT_;l&o7v&_euC|xO_qF{$LR~vaRivGlNmb%$nJgJp0D2;z}xEI-o5l8|yaWhF`N2a+2kCAmpwN?s_27{4(P8R!jkh7Ed1 z2bXVBzQS64vdg#=5%lECCrn>}>CYSdOwi5nLeed0RYVPlSUU0FQaOAJ+6h;=dvbr3 zgWmN!rvvnkAV~Noq-J{%)WaN&B7FIlrCncInDVWVk{lgfeiUBtu7Wu%pSX%he5VV# zcgz8|rhW`w`eQ!_&;HQbfN3ViCXA@n(rPXl04G_Qt>8$uP~l=ZYX-(;OdY==BED1W zJ>{oE%E1cTmE7hS@IKj&Vpn3}mEi~?{XgL44|yTah!voRh=NUp$e^ZiUJB`*WX!AR&l_Y+4+%3hjlFF;!H?qi zAa=eFl_?xOTAceFGKu@V*-YSsZZ>5x%cT~^dDqd zPO1ElLOS@oTpIe1^35zS=Xm)%FVFJwJTG76h31pL$ctE0Z!tx0so&Aw|S)uHU6s%Z>m|_eRAZOJU`6V#uJU)1PS^YRoha z^BXjcC#R?Ezr9lj=49MIM16K?R{Hq5KbFSFBjZD87$56x&Cq@ama1Tn^L)WC`1on@ zHfzOrOO)!r%M0mWY*zih14!?8dWvMpUQqJ-0z03PmL}zp?tyv(@Fpe z!*FIL%aL!IToh<~)tBgl^da&Ny$O&P=y%R=S1YY0I|1UAc6Ns6%-@;seCG^jzP_^X zzc2ssaYD^!2EV`it4HQbR8pcYN{>>Xa*qn1x;}Z_o{{zpb!S|AR@$@FoptRNb9lcru-5WbJU%4dY34lr(%J+3$8s!`DH2=sk`Xf^ORqq;wp8oy7mI)*QmHo z-RrJ>netbtxIx_;uDwY4t5n>i?oG6>ka~@JSnXB%g6MOikGr=hZPE7fHEhl-qj+1F zHqJ^D*ntYt*v3IT>=z0TZKlhh$|{v-rRrqRrDYF=<-YN*(fQD-C@U&mTPK)7G53-)UKwK%6o-f|g znK@_h?_==yifWh}6)IEd(x@}jzXA=2;-bpWihc^wA4|5${rE_kEH1ZnUBr3zL@YKQ z$liov(-O(U#*|~E`B1peNV|uM0@83$DE$U+Uw8~I01Zb6RHd_00is$`X;Zkp+(u91 z!|wNoS)K-QP}OR+s&%4_$;$rt_=3aC4uXTY&h*fnKH);ADmR_&VKNBfg#e%PQ4nM% z&^@c_GRW0|s^h*&1FJ^^3WjVAIALxX%2^QL(m@gLD^ZXMgRJD$#HD3f0t}#6V2>)r zGX#|bh1ktNmpjUUwTkoLP}h075&(L&KHzQ2kUuj)UzJ$Y=qQ4CH_J17)LD?Oj{xuy zoOvU=FqsYFB++%sgRleeH=e|QqRU>^AJ)zru#i2YOSd%(T=!Q3)#-QSffSuikiej; zySYyGH#gnPY}8MJ&qRUiqt6oWcWEk)-36`E+{fB^<;q`mA@>PwnB1 z>ID`s?94A>0oIFf9BjAs&abA|GG&Iz9vhvFGize?(egLG{qmWrUS#>K7s-jP-g|+y zpQzvaPS$-O6#N1U@54O-y$9Pc`|69JZ?eP2`UQ*s_L$ji_%|wrfSjI z@2$8%pi9LJ`0M~2#3e`_M5s~JGS2q1LT#t<8_;u_CGbd>LogLI#LG<110}<{Mn)k+ zH9OKZ^StVX)JYg+Ws){(kB!lSg8i%!LXtUg`jaN6Si34@2Bg>g=6d{kX zQS=AY4O@Y{8(!N>IBnzY^H)s#0VuUp1~9Lv1j=-m9Ku=ppl{TB&$NcgM`<)xPro7LNP@C0*8?6$#_RVASM?> zhMd$}R+nd^eb{+Wn{jLK=r;fObQ?JQi%*~4rl+%Gmy6phd_YfI^ilb9nzg{> z0@Z&f?`fMnSOeD%3v9NTDQ@BWH;+n>{&WnwXi?7xNL#${H&Hxf77zw=3@f=ZYd6SB(vLhyE!WD{e8LvF!NMhD8@PjzZ-9t#L%2OnlNH(MlQv&6L>!@0VQGoHi9 z3K&$z@&)?kVvG08glTS|Ab-ZYhVL)@pLo|mW;ea*h{5kY5E%fWeV^*rCwR@Le|fac zGGAnGk#(U&g3Cw-1fky}g7q3cJn1(pvWnS%O4btDKLu`%M^E~-MfM*8M5g*(64`&5 z1Ww67;^`dkq=9WZ53uq9oBJq2g|`kR)8Hvoy36VAkzh=+fHE%BisVF9Y$O0Xh;{_7 zE`gQJbQ!o~UB=Y;y6&yPXblWFd*{40G}w;d4)V55plN?}p8CjZ z1km>Gd1_kT!bNA?s(BA5ejqNYclVGi%elOCGzfHm?ZEmQK4bF(6m1|Le?UIte&TQT E|5&fuz5oCK diff --git a/data_utils/augmentor/base.pyc b/data_utils/augmentor/base.pyc deleted file mode 100644 index b8b568d325d2e979dde0d9d8004ac7ef491fe66e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1811 zcmcgs&2rN)5SE?PX)`T9hjP{pGgIQ3-smt4Y0vcF42R|-6ibdpWy>SU4Q05|=V5p^ zUVvRWaX4_`LX%kW?n=A+eY>mCuaoKN!a$ za&KU{GL7e|anx6m@|E+J7oM)TleEy>Ia(S^dAF`5?DNVrv^GL&H&bw!q!BFsDJ*c* z=*~-~>eg7_8xFxB)SYs0uGc&PO>0%-S=luO`b%SUF@a@%dG$d;5ZtrNq?W!i!k?hI zPr}$sNNykxPhTQ==kOwdJU|O;Oe8QhCh`EeMDP(3gJQfBCnR5_NWS}l?*AD(O}Bg) z`>H2tHl^^HYrSI%I1B7aM_pw)EvRZLX_cp?H8pUpT)*Lr@&=4`t0gjwRS3RHqqW(n zX1%XTPlfAa6-fNik)YId(Jwqw95t_GHjSxd+)L0jHyO>#-3v+tPhC` z{KE%R-lrQux~O$OK|dG5(K~LH=^T}1DGLu>X+mSFrtLhCE0m#5hGv}6MORe2je#^Q z!ba_@5RO7aLpnet=8gVpek+3|po79-Ope?eT1bo2FnII#EtJrdv;{+FS78DioCyxB zo_*lzETCUaCeth^!Y31+xGGfNV#%tp-F^*jT{Q*DTK&aQP z<^>omZyXF`9o)>O!?BB&TVD6%uq`cW`BoyFnPx(7c&>@B-dWPTl#ppK+ CAmwoY diff --git a/data_utils/augmentor/impulse_response.pyc b/data_utils/augmentor/impulse_response.pyc deleted file mode 100644 index 03fc786a564343a3778f336cf7d50d809efc19cb..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2135 zcmc&#O^@S55G^|&lVn*~1W1hF(BhN}vDjmT5VI>GabOM}E}Ki!W499r+ikhqGg>Jt zAq_Wv9}@q8BS$X0>UJ`_1AATaQRymIS69D!RT=&Dc=6{SzrDz4c#ZJ;?N1TeU$DrD zHjokJF;x-OF>PXsSRZ+PM4M62$6g=PW*qbhRTHYGw3!BdO4S+FGuq68en!=t>IH2U z&}XElR3Pe{ekHmida-##`GT&NkC24@3gWqGoGKeD+)j(KZu`n=(P`V71`E}1YYhii znnu(n*VU@TCR#=i?gI#QZB5lXEwR^hEYbB@m#ae;*f;s63NX50Y> z5C;0Z7*l$+QGxYWFTdl4N$4McC`vkuaT|FjCJRmQu!YbLBS|_)r z`#HH)&331Z-St=`xqX_nS!Mb>@qD&Pu5~MIt97=MPZLH?dg>(u`AwG`PdxU;s(tC0 z<5HGQ>7@J^YdeLYXpVQraPVdM>tXy33&6F<$AcMQyaSjqoxh3+_uG#Nu%iFZ?;?6N zV$}B&Iso7?xhdU6QIt;rH6LSthq%b|x3kf6h$(q#oJMae2T@vqk7`8OeA!ki!vhe{ zg59-e`)YC8Hjvn}N!rkLYjNSpHK@WFZVC|^00YDX#__3vb^o7p>$PWT$`jEu_>HU`kDg=s_ZGvFhu+aJAo0$*$2puzCw;=_Fa-+EK94?% zmj3dAqm8Pyl#Vfx5~S}dRv9-bbCXG#GSn&0)cc$G)w^k>lndwoka#gYo5u6W<&uF< zKZ6(&FM8K^NIZp!Ql?4DZRYsm_f5MxHSLOrTgr382e@|$W9Z4lZ5W%LF+Q9B1qaX0 UuS0>p;R{->%QSkz@7T-HKgUNmnE(I) diff --git a/data_utils/augmentor/noise_perturb.pyc b/data_utils/augmentor/noise_perturb.pyc deleted file mode 100644 index 0b118e7d60c67338b541a1ec97c61b193d0f0710..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2738 zcmcgu&5j#I5Uv^j?5;ydAPQs!5-lMiAHbF(PAe3VL>yV6*&M(JnqE(jcbu6XYr2O` zq+JOSZoCJN!-)g$z>yc=tLn+xOTvLm{8O%~uBxf7>iXJ)zi&?d`t#TCmejun`2F&S zg#1rna-s!jLU~G6LUl@uloFN)A`fUWi1JY6AuWbc9#J)-dQ6LPl*d%>H+;i^n&QS#SO|QbTYkxB>W8IapQt1omb(^D%X_Ws*PV1M2@jekAmJ8>IrUA|1*^+?)3hNj>|-G>e=I%RqULRAHf`yGE;_g5 za8Z8~`iJo$P5a1r8`UGIyHHj>if@TkUN$FI@(FjLIOa^hiHTl?xT-ZL_=sz*z*y88 zB>+vi;wS`ai@Gy2r0itMo$nNip4>`nq&wZ7Su< zmBUZ$55ktsr>b-+x6AU>R(?15u6t26CR~eco^4~BXa76)j{7UIB{^LiR~FU>y)oeo z8|X#FqEs{P!E|V-%1to4xxY5VDGr7YI6kXb>?NNM{&L?QbC*8{bCJ>okp?pmCZP*_ zph+nVXg%cCUkot=2`fi@9!{ny9FGGZ!)s|z$It{G5xuQuFJDz440#EcOE3-_BnU3dOeKPb_z8K@!x}+pEb3epEvE(`MGhc zGt>C94$M6F_vf2s)pYq>X4ubjYd6|&tX-bz`*ZGrmT{Z+1K+gs_`DtnW}D}Mhgxf# z#-Q~*@Sc;T9&llD zbSR;p6LQkgt;M}dX)A|A@8CWl#JU`EPh*VtFGm!{wBD1@BRb5-yqH5CA!vFDG3GN8 zW?$%mgny2FteX+Dzn4_#@_>6Y5|)aw;oC)n)VU#j?_aLmtIt%VN6oCC;%)#ZakKs$gc=6 zuk()RX~V^dbiN1EE60*CQ{Y{Cop*vV&VLK_6I=~16{;zeH}$5nJq*T;^Zq=Ls*@% z*27T}%S6dz6NKwrC~Nyw+v1Ai^VtERQ29)Tk*_gdl}oFOwy8Dek?}#!4%nXwN*%{o z2KPzZ*{K*XY{=MOtF{bt9G|tv-O5&ZSrqzZ0W-e7viDv(N7KGWaT9|5Qy9Ft@#ZRA z9Z4-4`Uon|Y100O$(`g@`c85;xtWZU!{h_d2xVLdt(~c@*0OW00Ww|1B9~gDXG^U! z?qT*Z$aY)8y#eUT_+ZH29cIQi#_7Rce{Md3qR+bM@RtW!XHZebvyd^Qg+q`nLe8!b va>ln(#sDq>wkGj5xczT8V1t}D$>z-$L1y}k*qR5tJI~`YPHyu%6*~9_-eJ*E diff --git a/data_utils/augmentor/online_bayesian_normalization.pyc b/data_utils/augmentor/online_bayesian_normalization.pyc deleted file mode 100644 index 54ca1603b15c17d721ed610bbf77a65be55ea6ca..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2451 zcmc&#O^+Kz5bd$O-pz`T6_JqO)Dj5!0CvdHf{1Lma9}}WF7c()p6RjMndu%)_v~t; zy(KsPApe0Q5@%j@kL@v#Tz71jtG}wNUcIUr{`K(u&p&>DnX&E};{S(N33IQYDF229?cKIn^-0 zCJBUp1R+<>maSJpSG8??TsXr*uCKL&jX3i$X6i;8FN)S=WG>D>#QgJkiES>WQ~pcL z{1TkH?0mZFyR_fD2XTM{mjvP$P!vE!?2v;F%6AFj1`L4)081B>5%#$^IM&(fMaK)< zaGB@YtUrv*^!%LTnb)$B6>rRX&Qob}Tk*9rs*xUc>}snrKaCsxZLQ+IChVKW?R_g= zHf!ZYzMAvjg38mM7yMe5t>Va5t}?w+rRy8)9VtpHecv45woS11D`~-)a$Z-`zt`U3 z%zMl^S=FU-a~^Fv-zc;88}iSz)k-1if_Izxjz+`#(;!0MRfjlyTMO_@-p=`ts*r8z z`IG4rH|0MSyl(8Z&Q<<3D61XcQHbB@vJ7@87yo@^mNmc**1bmH8BPKRVtt_-=V{?^ zE&qTar!s4D$5#pl6qj{f-u5VRR5Sgk>4HO80n)g<1_i^Djqyi6V(|-N@wZU!2JDV8 z0c9AJgx!sTf_dmz!u&~4Mnv{SD5k5)3C{X+2(c%Ir>Ymbyn3p`Zqy~iQ#lAQIIbOG zG9k~x2B*rxQ@f&#Wb$wU75m;k)$>uVJa0X(wv6Y(L*yi=@Mw<0TTDNJT z%yRd1R%fMc^I7OTZk8)m3s)B zh4`F&Ah_80K@vY1<$-<#Wyr@@PyqTClCf<-e;d-X4Y}El*xT5K7ZW3ZC4MI2Bs>oIelucSG(i^*OGJ0zC?D+DJ^gbsl=v|D9e$TX&0)gKo61n~r~< z0e~IsO8hG!DdCzDtFono%CPaDxJ$(#u2{rd6!vVh?;#{XCE5(w{5(Euxy3Fr~j2{a>E zjUXX=%=Q>oiQK+ln>Mj~tHLdfpUpT@AU45tm zhV`BY=xKdrqg;2kpme?e1pTl3L!3WX!Nf=C$%pfID?4!L+;aj)pkfdl>1-zOvor?V^hQ*6?L=w3S+Kv1u?>qXD+=GJ zG(?5VUZ+NP*_UeV!?UNyqo%|hhLYS)O3vBs;2CAh0=Q^d62 z7(cZLKJD|oFYUMQXR+o+_QR2wz%0atxe>#($bprMx1aa({S$#x;EV;qL(d4GHw5Za zRBlIb3m{QRSYd#dJgtj4?Tn`=LQX85F=s27dL1b}dYLMBLWp$0B3z=Dm98<_ zjq~rb8)Y}8a-r-{$#gi&+M;%y&X|EA)5f$iw8j*rJj=9-N_MfTLpIRW`Q1R$_ERJ_ zN?F+|O8Er!K(#`0oD|f~_F(`XqXYZCMhS48z}JK?(~=txKXG+S&A=C>vTYafmgvff z=!6nc`HhMD0~we_?7DC`pIEAhq6z|k%Hq79x3wy80cReNRt>XsFKplspRy3I`LJO4 zV)CI{3$tD$i?AlwPo7R&Fq`(HmVMCelA#+crUF;jPnXcz(m6jsgWdTdYLRX$CzQka z_clX%)$9P0T*EVT1)3fl&8f~|&Q>)>$~Z%hlt`GaCX>pSQoBORoR)HG0?+2PR1(HM1LwWR}wSb@}BM6%0+(jUt;zJm&MA>vCQ4pzQ18h#4 zzm{K=KhQU%;B6?9I}Cd4R@GMdqp0foFsi!#$*4zecTtNe(Bm(~_nI;P1IkTGH$)Q3Bq#~p z;DrT8fWr$Xdl`WDe7^Fq=FxJH&vSeUQ0J_Kk<)eM`QTvnzu^BULNFWzm}3D2;od>Z zwzkpPC)KB;!IRn~0>C|T*AWnEF*q09o6T~=z{N!MvzY~{18R9?x!o7!a~?`@yO zBO3c*(S7#JZ7FeFUdqSN-3)?~bNmWMiOc3Mp!Fvd0JFmz5ZVRgb`!cy=?nCPc2kD) zHVM7EnsWsA4I;Dv`mZ#0aS=|KX?SV9MxQDVVVuCLT4CD1x>^;O0NETk-HgQDyN$#R z(WKL%rh>&%~#wWRvD^#$|W;Z>FoC;Y;fd6s~Rn( zzrc)?VAxRekoPTRX$vWHW^Vo)#N8%>b`QMGx#Do=Gw!qVS$ckQIcL0cX7N~cHQ?Gu y)j3R*^F^C8BR%i=u00m}_E=ou%FdbYA^!buD?tA|g8hQ)bqxP3dBnf;(d0kI?)jns diff --git a/data_utils/augmentor/speed_perturb.pyc b/data_utils/augmentor/speed_perturb.pyc deleted file mode 100644 index 64f5bf348ac70f2a0ff33b66c9d623d55fabf4f7..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2460 zcmcgu&2Aev5avp<9J~2LfgFllcqj@T+TFxy3sflDCP8n7QLK;x(V+~r%l)x;$;Fau z#ejRM^AhbdwQtf((HH19+>K-zMo)#;(wH+OhcmJeLxSU~#N>tnVY_xi-^6Skc6`W~zHSUqLSDfB5*QNzhH>!1AyOORnn#(S)6_$9VHmRz@65H4; zfN&2W>cb0Yb3a=+*UY16y%x9{KiPY!OD*WMRA9fF^B>F_L9a}wQ*JV@)p|&mrnxiR%DSl(hd*u$(^aYWM%mx+ zXyf<_aH(Zg!L2K#=1;<}u7-C#fUGjo?L;Ww?nL;ip|bwZ5mvIz-v{_Ke7Y0hwezZvgFwXivN)Sd)LSvny;!q|QY!GgQN7~ftV1P4Jv5TG>Z>!I)| z2DrZyeEdb8_zON6v&#|t#ozA9gs=i|P`h5t$b)^1hUM=S47*OgtOe7l`{w1D(nRST ztMrzD3Fwt8yXnxL!P^b8OH0F(s%(aKMT9*zMC(F2h|+R=0G?8R)>INL%;6A=&R6?R zcxBtrgS<;&mGAs>{@kB1sMIXbFyA8&YR@VaA_dtA@QFj>lm z9XtTMe7XY$wXBs8o_Rt5(OpFQzOO~UwqsspO0w3=c(B|W7AUWfF7=$h!w>X7my zTF#EKiA^BRP2*JASmCx>T$rk_wP$zo|8*U1=Ion_O3M2^X405bi#NT3fg{ zEz7z!oeL93n8?eE(!woRzJ#Ui%Erl}Z!!)S?Dt@Q6*e*5nX=kFhUIsk#p{s7&GZdW zy!jaeC7@^kh4Vp<7%rR=zyLH>CX_B`46FSJpCQnl!-&FkLgjhcY(%G;+|-`{;Mc|{ z0zWT)+m=?~?yb(fbKO_2d{UV{Z?px39gyT`rVy{se_q65sr89>-8$FWd6KB^vb=~* zw@KQ(NS>WOeGxzX;q>(M;#mwsSj%bErqd$VC{1PKV4k)Ph^NvmyO&YTbZBzvYd-wk$0^s!6t#5t#^Zcvb?B_a(m&_ zR{Y57s&LE|SRK=HKZdv54Kc$K^1>o7O17rf$ve~ik-SsQW~+?d_E3`C zK2O?=8It%0vPrIWD{ZTFww2G5Tsb9shbkvSx0x;pZ77E}jNA8)TS`h4$w~PQbUTHh z=onYVP;ut`c*wqo0(5q`Jxn_f-)>CTBl;71M7s%NdL8*!#&X6H>|gA@Ma~+1pO;3U z#-7##{kRJj#%a7Ll!FMZpqMUC+Dc`Z0NX6cTMvEX3Bqmz=X_XcwSTG1#VfzdFci*k zD+*l{;4xas1p0Y*&*y*hUiw(tT`fJHVHk+Xj*Hm=4wW84I2L_Va(bz^GJ@V3+FUdO56{YX-h}nM*e&QIT}@KDcuQXqy*Rciidn`DRYxa znSKF1eF$-T3a@<*2B(~||L8F8)BDrW@xfxoXs3@LhO~ lN_^8P@fzQZl!@=-{}1Q#;J*xT|HuhjF2FQ;z}M)(_#dIL9WVd@ diff --git a/data_utils/data.pyc b/data_utils/data.pyc deleted file mode 100644 index 961d3a9a9c39f35b2f417e3caa67abf06a2dff74..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 14736 zcmcgz&2t>bb?@252LXZvNf0EVNKwm@!({~oElQSCTZ}@HG+lBeh#pX61X~#lc6xyU zc4w9|vjic7myi*8&?$GHQstwsIXM4-%auc_a!M+YAg{X-BC*sZ~536*N@OQT>KmZ77HJrqr8i zwQ1{PQXf;RW41o7dgE$!T*jh2p;jk~@}ydwEXqgJ>QqslR$E8a>Wow;RPUJTA6Ki# zEx1Y5n^pZ2YW0MzA5p!Ns((tYp0f2R)tgiO(`xm!txv1oyy~A(t7mNesOp_n{c~#d zoN~-@^$6=)P@z(0L3ytgg+Efkj54pQF4lKUJyz`Y#p)X>7*{K;iJ!%Z?&1?C8<8J=6-FESzMq8gFxCDr2xHrKkeERdcTAe1!}K5=m|hqe z?MH!*yIqrTm;gW?pv~buzRs+ZW?_(Th8Nuc{8Su$S8E zX}tYtxWTD?4!V^u!@fO%w<`lD_lW4YG&2pUd{T@eT z?0f^;`;%gKOY%I5H`hp>zeB;`7;#p3Hb^z5NjiNb)ESKN*n+bvm{4Y1nF+Nu$@2^* z)oZ~K6-=qe(`prkX&y9*@Ti>rv@+8?{ouHI3<5r?3oh`~{-$E13qs2-Q<$)Y-| zf>Q$RgbL=Qa8d=QrEn?-9?Yx9V4^vh<&1g^n5UJy(mIFLr2jmj)U?LW$kH`v=6)EM zRImGK*m)*F>4KmZD~d`5+@Um;zJRD?WP9KzeqY}QHLQExuxAE-w%OKqSReNMWLFoB zLjGYCmi`&CkY3nVZLu?y90AzGOXI98JVMwQs4NImu5i? ztDC@2cPw`b5}O8lXD)W){s8O{XpWJ+!MMQOuTOdixaBx9dxuZ%*XZBFjNNdft?Nud zG|)JNWB`kRk@Z%JZ@UaT?;#sfHr+6gFuhc##*Bb1cx}n^g1F;(^%We76DAzlH2;AY zqZA*+VPXX1a;K< zBhz+!v7e1#6-M6%s}w?__cE{=nf3ryJgdNX{j{xDSR_TIAZgj6K9(Dn`$Wl~>jC!a zf@RDIN!MT29uD*HmwqA##JN)Q7agvyH07?;Fi?M7dB@=Z*N-C4>7)MymQ zn8RTu2C5<&A)2%t*Ve*`2bMH~J`O2gjqZx#F`BDd(w_~;fjGosadpDn2Vx($3{&F; z*SsiKOTx-+eRtDjn$g4!U}+W^U0EPvYt*-61Og0$I&{eD zAntpRNI_e>QlWSn;q#F`#rU)V)%MGWSrcDj)ZM_B{N#mquI7dJuVzqx%93EnL1NZ) zXbKdc>s0)IkA}T5wWpMa!nhP1l{M6!=zg|0RR;C%s689_)c5YHu3-C)gh?M+N>7l})LwX+=-~i&wA~rlc@KJlS%rC>*Cn zveKFcp3B)Bo`;C$jCM?IzcN~5qiR-+pb-iM4zLmmxg@oc=QA331*>H=(hj2<+0qt` zM1k(rC3D_VlZq~33Z^9!n8c-dn4Q7H8(CLjeYiZu>=fSvc^HD8C26tiX4qUKGdXg* z4N|m(g~LO)B-)TR&kW+urmQdXlgpr?IOE>360dKWPSzrQ$e`5px-Pdeqg{DGsk@v% z<6~su(okPFtyq^3T&~0Aez`nI*^3(bE+=l`V;|{XqI799?wd*ub zR9h&Fv5&(6<7x}cA@yIN)3|zwD%vO278qq*V1b8(jk*m`J-v{07tc>(KY%4_Ev3a= zu2Fx|-Kiueb*H71TpX2hGq*iyU`9}=g}qW3oK-(jUsbotWYc?Mo>o1rHCx%8ZG9WW zXKhg0X&HpE6^NA6)*P+(u0U5Xt?mM(;Q;0*1g_Nsr4}+#iY0QfK}vaC z0xC5_H6c){VUuv3ZJibKv@QpJlK8ue0{B7+s6{Cuj-Z#tVjg>zO5$mIg&*gV=QDD@KN$3OTZHCnwoyzJ`)^sZ+*RQizRPhCIhV_Y z)cS!kypVI2&2bDK_U9FH7MVNOgy1Pb^Q8B&F;n*#cSJuX_ zdor-mayP2n!croP7Dl`*-MSr%--XDM>A+hHh`1P%L)g@2sges;obAW%kOlC7VU9w2 zw6*@Y3o|d%7vEhJsL-2Xe+b`!CtFMRoql~@7>itQ+*?|Gh}W!H*3k6N+vO}NCMjj)@E8OG4s zcLyd&l6!QqkKY~EOF4jImHQ9k#Rov4Q-~l9<&#UFoUhS+@t=LT^xJ!Ln=Znu&+ z)!AFAn=^SSJS3u1gj=|Rc40wC1=;1H<{9t}bQj(UnbLPI@;*MmY+3QYt9nh%wf5mXpc+YOa` zrk+mnXM68b`WhNPkG7+Dhh~>3%UtQ|@25-Rp0p%ZL9&*8B=;b+V{K`du^^Xo zx^J)pdy(zM6C0J?;@sWzdocLei}JvIonwf7KDE3d`--D?$L|fzkCO!O=rAHwVfChG z8Q9Cc7m4G*Ycx?H=LnD9PG4AKvK=j10FSf4UA)+RFa=5JFUMTGZE>97ly5EcxZs_e zVD5W-&(P4Q1uR1fG_u$_wCpx2vTW$Bkd{J2FP(hod0OP{?){or{23mqF@-qKlrz(q zcg{3sP@Z>+=NwAS#)LE7U_|JIa||x)X?dP?EUD8!m1W5mxQtP`pYpNH$0vN;;^Q+u zeujq~#{IONr3<&OHcYcBBRKY@Zv|<#z7+)_rJRgpqTYMw?NSspQI{|mqjd-!Lli)Y ze*ggvas9zci!qC|i-&cZ^US^7?rDrzr(Yst&`Vt!Bfl%s00+)JV$ zd&v+h)tjisJg)b1wB>;ZDtw7o+cYrE=EL@`mMOMf?7meySNFjrAYvm=oZW`QIZUF8 zuXtnJJDBoc@cC9dvi{5+fg8qv-wQUS?6rSiIJTsyLpO#!{$DocF4p+(wT&Ugkfq3n zLMqo$0MWy+u!d9_V}9@!jSAROk7#PlNst-gWrUr_6hh5c?tM?)wGrqlcKii^l?%V+ zCvZ%B#tI~qOIMpg18HIWN88Y*81_DbM1zqF0JwsKY=NmVm{!G*1ptxbMV-Y^Ori$> zx^I{`OQoG*XAvGkdeS6gnBhR~bJ)ueLRN$?y&_6K{WTU@QjAMyZ+$mI1dmZ}p_7W;8LZ@;&%VYdjY#_MY&_AA6 z0ddzbTw?FQhv{Xm+9hkXNFSD-so{&=C`gz)W z6SGbWR6Ub_=3&u@ZPO4Mr{p0MH);Q0Md1lnA`aZwB8CutXSE^0cBHyQhn3kbxG*4V zkUNMTO#Nug$xVcR1*Gz9IZgPYxhcD(uj6<#3Z=riEZx*voV^57*r$g@+>0sN zlxIa+C$f=5`nNXhSTi4TF_$Nj$~p3c5^iEJa4qh5nSiN?uY=W==(d)y-9U9pkqeDT(&39qWe=ktit&TOZdEe>x5KC_A$wE%xO(MoPBAY@Z{%s z>U1}NI3!!{`|+y*A;Bah<=?Ux`_=+0shq>b+>!;oi<(_fwKUB(d|u7cpr8gOIaOa; zNIfe29pm0qBKk%+j!p@fwg+4!W&HqybH6He7tred1sLU{Hfq`llrq&Ju%5 z(2`E1cVVyBX(XJ|3;R}-`}DFH8?~&ZEp3+adAXS?rJJtcz_3SE0D$`w#Wvv;GCLIy zd$0$LMtWd_*S>HU5@L9_OcB6=JeUj<7jRipz{IR`*2)FS1j+}I3%^C7G*7|nk8KE*Zl~N% z0h?0@jHz=F>y#dHu?r)>Y;S_`Hc)D))8BwK!M%2rurZkuLKIdFxE!2~;b>x~rb_>T zHx>W{Er4^56}iZAj(1xiDsZn0Zzj}HPT`a-L}pCA#WqSE{!f6g`v$blEhK!NokNmT z5W-HOgfwy$jVqG8TWh-A5LYIn&Th#WG%!UXN_t4zElxVF8vsJB7Ua$mEGpMmWP%k@3nIFK>87iv=} z7*z6rWXnZ;(F77p$snA<|7n0{RfOzIU;s)ZrRpGA%Ct1CYFkxd@IF?XM?WbYM*5VH z_uV6C>reA2sJMl|1(SS|U&FECo??M{c0BLi7T)xrw#?$HBdksj$=ga-$Z@Iv(}-Nxu3#&OCBem2E;OOolPo)so8x_AD>h zycqJrxP57F*%#YV|Hj>LUZscAY%G2kDgvm^$)Z@|Fq%pp&GO{SBejD$c=4kKCXcL* z#6jHmAuu|SG^HK;j0}MLL-er{kG~SUa|oeVH?OS8ClOu7iEWUYiB2j3 zv41V8l<{;*UqmkW`W5PfKgTRTweB=M*UnyxWr`^FI22rmrBj$N~(>L(o46YX(>~2rc4w~yfZQ0 zM5w=6`#t8&%umf1Ei*rxo#OA*^wj*+>Dkv?5;^!9WAiQS%Ecc<1JI;gA%EH$F6VIn zijSvw$fiR64|d%ltHi>39832vi*a)QvU{(1V=s_nk6kD`I}xJ~_PVM|^3C4DqiS2q zFMh^JQIhNP_VV!g(XQ3~;3ZyC8M!fRZxf#!9bkLe>x?9%+O*}x(nKyVy9TdoVG+2C n%H>jD@u3Q#U9rUcA-61j2M^|F@QVzN{hhUS`^3Fn{5Jm=>F&eY diff --git a/data_utils/featurizer/__init__.pyc b/data_utils/featurizer/__init__.pyc deleted file mode 100644 index 949e3e05e8ec9f8f7b1c40e5a444748efe29ad8c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 177 zcmZ9Gu?hk)42CN@h~Vfu++^Tp5ud=tNtaNXXp48((zKj9`^LV2IdC%Ye+hs1L*}3L zYV&@ao%z*F`#~iO1sQW{Cg<2F%pc^U9{AWZj4|I~w8PEYD8A|dBrPP@#Y)ge;!c2= r6G~!s9$O$=EY=cqab;U-4lmAt7=};~x9M4ydVZ(b+VOsRLR97p^o1;! diff --git a/data_utils/featurizer/audio_featurizer.pyc b/data_utils/featurizer/audio_featurizer.pyc deleted file mode 100644 index 6b855fde2fb668861625b6cd7fd70130c4b336d6..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 7371 zcmc&(TaO$^6|SDy>+EjU#2epoaYb_Cv0}|8CK6;xf)gu|c)*}ulPD8V(>q-=)4M(0 zldhg@Hrka?Y|SZ_+bsk%*9Uy%BO z>MpqYqDmH3cS*)BsbpD=R#bPz^)0JpRgGGz+j8|4m8_}Jy6UdG`l{47RCfdQmNHMN zK2}^)50tvE)JNS-6)mdW_BsT#*YLWNr9~L0Ru@B~!$}lpx^Kc_lE*ziA++2B7l7T{!Gf5`YFS^RDXejf%>s?yc^{qK=09p ziWZbvpc0}b^0J%ufD*I6!5qx#TLVo*_J-(eU{aHZPG=Cy*twOex%x>K7WH)w<21?+Wt|VD_&={wo9U5N zM&V~cKR2K3=nsb`o!`2S?My#6b(SVa`u+Fs={>+r1mzje{q-^&rX6GRR7wFTV&;iS zdod1i&lHCSOMLw5O+8)pXFANy*>mLvAWlv6J$-n#U_AQk=}UYE4EloRRd`qEhVDrd(~%c z<&phPtO=rbgEY%WVG@51Q_fPc7N|R?Y||c~b;iMH4t3Au{YXJ1(k?CI!IYLzG)JCo#TPV@+_iY+vsDF^paiGhl0^JVw&+8$;jvwfK5 z2iu2XIv9qT9Zpc#j_j@NaWBax(e@~VH@4f6F=OfvUMaY>9fd_0h-2S&7RL}`JEUt5 zs)o+^sG#gY5T|hw1UI-Y4??Z1deyH-weV@;k@No@3ON6Lyp`Jb)WL$vFVPU}tMlzj z<#$xk;G5rzf#Cc5p7PRDQ#4f`@dOZ+q5BI@@dM1hsPg+V7&ar8w!e(<$&>NuUXi}l za@qS0SQW*ze}J*R=Y0HJEH!oWkK@pZ>k@rVb=HMNB}r~Ob?dT~{819t4CWG2I2_7k z;*-ystBfR$ck6M*jBTbukVS(!x6YB5knEHd>ID3FWJ>|3wgHxZMCvmZpK+9I&czfW zMNoN*uN1NRspn$3bi6ID~&doWY6LjAP^^gYB5&VkMyJ)=-} z;u8R$j#^xE+7WV^GRj_?ELyA)5|x+t$#2CuyiT8%@WIsa3rAL4^hpge`_#=CpGipZ zExdwWHX1V%nbKxKT6b&OHrd1Hdcv%804PY9PK>%`%cOJCaeqRMPm~`?&r?*GH&C$` z@KWcW_Le+8=keZbu6h?5>t2t{!-sxVi1G*pl9(i)2jIciO2(x3bZq9{ju{u!ePo2? zfQI6Ly^;jKZUFo^vLiR_f+J6)R=aIxclIh?QzHB{%;Ri=aI9|lw&T+Y`119)QTj=G zkb*u=1<)_qj^1@SjfwQ9G<>8DE_&!VC+>?cyM!rb*5LYQM&Z*MeOjZ>TvM!J_AR4e z|9LbEgNwQNnx9@_8=fcX8GS15V?34ysv4Ve;4Nl4F8EO# zD+X^N(%Ql~pDfe2PTLk-8yq4G4zplhEQK@~u@^FE1YTUG`O}6vSyd-3`dT>G;}%6d z5T}cD`*O63r=>2QtjPx$i~M>`9j~chVyQL$heE%hE+eM92qDS~jBRe-QGmqG?%>6S zrgSUmHM1_Jk%Oa73>U5JExe||lO9c&+88~^ku)8>6y6}#5F}WB^s2Bn8!?+fxlFFD( ziKP}qJML9Zg|Co4!O19HXz@4D zCoYCyqA(zsf(zaVhC#gtl-oTnQ|0hCXLm= zMHvWevAd=A0UCU?sLg_!+e0``Lbz4V1_#N0U!f?_=XphDL?^HiJpKrbiqr_ZJ!_RWzO-{Ohj?!F4^VW$-a=cE#ciR#pd+5L@*;ZZ-?R7xFA4qW$#LQ6@G?$NkT(_H9&!X zx7*UhqO`XeQaHev)PEfn|K_YE-gEhxgDc*>11q6QXakp`MKTo8gW&QVYmvPp)t5j` zL;|g(2q*j-(@FdNLd45iAl9Zgt1^?FjAT!eI!o4!`?+*re7egvO}mC?!qowneoI6x za$LKD&;sEa!XS0(;Yz8K*@|F}ldMZH4eok)+vdN;iHIWG3lrm$`<^`FU%{$hPzH^c zY7*ry;J5BQ1FXN=xavKJVCbUvviDr$n%Cnd>Fntk#VP&;1v;TXYv9Fs zS0E1l4nP8aSOOyuGmFR^7h(Az4ycn{00weQfhwG&tIl{qJSR}x7cUHG1I{H-c(zlZ zFmQ2*DBykVa8(ayjG+~DFZp;LL8if-72cM zE>CeEYK;VnKF?RYChmu6+9@8%HEKwpj!t_vPDOpmHH1tM1y!Ba64e`+Gq@G1ub#Z= zzYHmUi%0%9jasfbyM?KvYk4hiy|IZO^Mbp=rlQ#(NO4sj1n$Bqz;(?8cddb-JwQg+ z3j#+&3@q;D6XP?0_OJ295Y(rq^$9+oVpN-#(i6HS@@X$}DMlHVt}Hd1UiDjFYkKQf zSI@bY^{v&*EVX%d{s(xKTklVB4UVn*Xi`2yDxa(`#FkD*<0EHmWR2?n%4gIhscO~M zk^IG{!U|?%xW*c0>Gbx?WoyRP?3Biu?d`*r$ws~z9f~!)>CXRc?p_j$C4^1*A#jp^ Iy>rd~0`sY8@c;k- diff --git a/data_utils/featurizer/speech_featurizer.pyc b/data_utils/featurizer/speech_featurizer.pyc deleted file mode 100644 index a8225d6810ad115b355e54f569c3d967fba99938..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4608 zcmc&&$&MRE7%nekk0%LPSOgM>IsoI52AhmHIVfZiBXJo}&xk0FP}A-%+nuz#ny%_u zB1b|>PCOs)z=azR5Wc^vH^)wbI1oEsQ(OPb_peUt?<+fhzBqsAv;5P--#foFm`X4S z7#m}3u%OAJ23s`QxXBu{Zkct9jazozHtRMUx9xh1MO$pqVdIWncUaVAi)}XE#=6Ty zk4-^ko4sJ{Ib#pUJ1pq2 zWc>yPeHDW@Q8LQ3aKl9+Q*95naS(*3p$cVecizKJl7_K%r&;Xla-T@t&4N%KmVBxg zPQ4p~zVn#HsHNGLu{<$_P7o^y|MG_Bs<}+~Tk@SLPEFOKe&kL?Ub1$`Sxkf7$NPw{jZt=6tf*G?_q)6Dp$JdC> z!`U1@1J^?aEK|-60@?Qwi&fbTG%@-uR|Y?qEZBV1f`y1Vz2f`00A}Tbj zr*$H^N%YN9O{BCLGmoNN!QkGApBTI4;*|`kh!pZ0&1$|-z(a3WsAyCpX4A(EGkiX~ zlTW_B6aIFavGA{#FJJmSoX|tX_CD3f?=jJ)$5^63HrW!@vBj1sjcv9>QQTt74%Nsd zs$iEbx6v+imyEd$rg6E!o;MgR@fONqpGxnMzJhTjXf?5<^k{OFS(Wjx>763>^bIW5 z?XN$;_9ko%sS?h98`qo`k+_b*H5JmecA~b$p>xwaxElUgJL!{vnvRu-rkYxm)q3J| zvE`7c4qbJqQF5pca4yq88u#(oK~XC*oXdq6p2_sd@XU*6b5E*yhRHBc2gAgVWEKn; zGJt;#10fO@fFWAlgJIxl&&_m*gG%&aeq8Mgk_!W!>xOZtU3VWm6~V`Poksn8x#2gl zOUlt-WA&Suz-*845B4)wdZ(4M#oDQ4OmmChs4J zRUVkuVR8-11e&jjMIm+}tZD0Pa2hhEHP}ccZ{yipD78I{;al$7;33a9K~*}ML#aZO zzgLE^cc}?Cp_2P%j*|(0i$N~C@lz9VVW9s1Fmc|8px@$AlqA`e8;uVeKKXL-n2;b1 z%aF)@{So#YQ}d0Z*7{FsuIm0%So5c(J-?*Ofj z!S+Wyilk%Re}~irS2rUCzop*+Zvm^k^(tQU?wbLl8}AgbPoVolJUL)T=2vu{n`3m> zu4}@_b&>8fbZ4%nV&b}i^f9NrOg){MoTQm>D2W@NH8he`Cg@l$9I}f;yegkCv;V8w zI9uRNQKCM=(BYkSt5JO~U+P`I+B-78jh#<%A=i1Dp=E~74z`#>5$BL!=`S!;m2Oa` zn{-f59j0oVf=>S_vHAWhrQ8;IigY=7j7HOl{f;0r2K>gF{Wj^Ys9WeXuF4uaP0aaTGF@S{jq1~1GRIRECk*ZHs1TF%)T#RGSW}SL%(7rj`>Z8dItxuc4X8h7Md^t)=@RasY=Ej7W{ zTk0pJUMTf=d`V>;^{9Udo7s=h>{X>p^3rNIHQJeHPESnY=1u=|seXXQ z?V?EzY*oyiiSt=qHO>!ky(~Y@ZC;gL^CmQPlb0@@%*)h4zwfB~0((4IZ?T3U#L%aN z;WKm$V1U(VLplW8;Wi*)r>zXUY>7hwy`Th9D`wLv)?q@u_ zsV7yV>!wPLwRw4{Az?<7z+gxo3S;%8sb+%CV0&BR*_$_wDP3`LQ|C@sWpSeIye1r0 zr_-dlRhVN_XjeTkW&WH?C1s}5sysFgA=1F5l9du&u?xlIBED^1>`F9@R;c%=}kn7`ZlB(2hxHI3e+VL_xt?B-9Qu5?0DmJ%vgzs?hOvhNDNTjS(%{s zxwUJfv8I>^IsN1nJber)X>5Yta9YjG@M+aN89q(Q!)a34=^UM5W^WJcw5aCUa8_le zu*1xlI)?KmoyNC^S>lp-?s5!ll55Chi#PJ1K5@*VIL^!5#qm}2Esaw_HxQ&K@D-$P zp>r0gMW_y;3Dy1Y-d2m2i~+Wmy8eBre!RNG&3*v%kn*4*c*R`s;b8dxf{6(J8A=aG z%QX-FH9hs;QIl+1(vv2Qv?M7kTIQuEl*|%R`AO%vB*TqCv!RxwwImuYOG4pe=HAYwrg>d+Y1rNj)ij#H$2;pFLI=QW*}8J<18-5_Pk z@Fn4eJ(tmOoLHF1f>c zREu6{5wV6}G&dr0;|^Z$YADs0=|#$lt1r<(xkb^1!|)b9`%Bx4mU?+Z>TjryaCo)U zQEQ1s@D*+?pMSlo(pS`?txns#yJ(i^duq{PNx?XPIPDO=``=yT5xD58#g;n7ud61Z z;_QsWi%aShzYdj;`m8*bdH?YIInK+ATm>8%)mYA)yUDFP`}rSu`cl5wq&U4&D|M}^ zalfGs_05!-qx+3XvLY{WgtG?9kxI2G=QGnJ&h#nyu<<05^VX-><}2DEpHmgR#c3o_ zo<&lVHd)E#FFf${>n>i);;+JPa4onVyce{BLC~jZ%M-dxW+sjuHF1n1e_l{znTX@8 zO5->ZB)EutfrG5ZiF*<;MZ{^LSj6o%K1%OmZW&=>}K$7w!Z* Gtv><8bbtK- diff --git a/data_utils/normalizer.pyc b/data_utils/normalizer.pyc deleted file mode 100644 index 8bb687f9efb1f9ca2b05213d8cd44f89c4a9c38a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4422 zcmcgvTW=gi7Czl$w=c2RS#}eM0#cz!FwrJsZ6rXl0KtZh6a-$;mKHJ6s$HJ0nQ7D0 z-E`I1aWr|s7Ci71c-sHL|KSI)@4MeQ)zh8{LBNA|?5U~isdM?xr8+-tZ~l1ksi%v?^M;lNYQXbGGq<%;Nk2^B%P`~5HD>7c8{)!*3Qoc(4NY0|aM*X#U zf1QpusJ|h@i1JOET%-Oq99tu^MMDr@rwgKUqA&W}l&;c2cLTiG*U&sH3#YTfs-e+t zS{qfA^+e~{Q&Zbsh7HsW(6}8m`pA~~)R`okRAue_!a5ex>?E^US@@YhV5X|G!X?9L zF>sjQ_X}HC_;8wL<$)PZOyS%gG4aUXyWd{hDsC04e`CdcjE;fgAPCx!Qi$rMMZZHA zqNH?%CA$cyze;Ju^0KnlqytTf2Kwt1A9OcC&c40UUcDf4Pqlo^6=Ibu)sa!lisEbvZ3rqFqsUq+*I0)Rc2LHmnT_jQq?U=hg%AB_nva& zMSZK&EYDS%)n?%GGfxe7WNZvItjme)XQDmz4;-yeGi&atqY^hvbde2>b&1@^?0I|Q zs93=>IxeSq%EFAY6H};RS*x}^$0v+ zyC4bsh0m%s_dNv&-70OaE7sedJFCo#$gy^2N7_Ojz-T1A&o5t;b5DKR4B0vWvD3xC z0LqZ7W&YRhzEsc z+jX`?4o<#k9ZwSsn{-db%>@_;b-9Vr(Hd$|oTZ~J*;)hQR{c}wQ}g$_F6%w@c-SBw zez@#qjQS-%_(g?x0TAytFC?<8+Pg|4EHWlNzuA8f5G67WB197YB9_oYj#P;4G{f&> znuS#E(5yrDb#fB%&jb28paQWppyQBctAv>uQPSb|c!iH(|7?ZMkphZsnO$Xa*cTx3 z9dtvG-?s;7p#}D-Wd$J2Fq`*e=@Qoxt(Ik)qmyG1!hs0aWl0>SCFsv7e#0J1!??_q43P+&$IBXsk;+o}#mx+6TMUATOus?xajjZg*2-s>D{t z493ZW-BdfBAPi#RGBx(9GslT9Nisy}B)NmW<@82T6l?`M;Y-2IaP<9O{{7AO7twnI zE&=9W!O2Zbz_4t0qF49_gOFaneIS|Az5|H;zD%^7g*d`A?(en6zW=yDIB$bglh#_9 zQjcyqxe#FZV3F#&);=RGP&O6Tlt%JPnOKuG&}(v?f^|%tAtqrB0Wdqt+}ZQD2$5l4 zY9C01d~HyP6dcl;Qe>XZj9oZhGnYSSPMpMX?I7s$SOsrgHbBgv5zEch;&>WNTsd5r zBVASb*#mSedopbf_=msjW&(^m=pbA$98m3egrLYlWWZzxksu6E@f>u;fxg4$&+E(? zNx0ADST+gQ7g8_y+GN3Y^U|cr-Pn67gq|a^pl*+erlQ$X&!uN3EvjY$qEo1RU)Xdk zU{Q%ltUfVMYK@jYx;ZCp}446ZXeJy5xz+g!~NM%54$bdyQLryyn>Delc zh@M6Cq{pf6qkp|Y&(>%bHHi=CIF4v0PnuiMvHuUg`YU}7fsfaDC#ybU`SIGp5KaMQ zp9jJ5CUNoKp;5?t*eKUz+t#mbYaT-^iG2&rL#cRuO)6OvAodpXi3LTnub0w=B1UqP zuhiyVC05i=@FwE7Q}$nc_PdaKV%u1Ck>+g^*B8Dl_m?7Z!&DQ2o zj~3>9jNm%>QqUyWsEr-#%6I@Vyu7P1b#b+r(H}E6F+b`g8qSlgyN;Pg#AH4h|9QS` zUb$U4?pV}Mm=(U7ad#t#fW0W_1h<2o;LTuPUa(CLF7(7CiIgr$f+Rl8dB}c}BxyN7 zpE1itPRuXPn6q!p?kV_=`Lauiq<1wXL(FV&m0&pnMmM6B4g~51Tbo;38|xd9KqQuM z!;Y8W<;G9)B#AMin6VUdR1y>+xmIU_f|yYtmb|JlA_gN&%n2Unz{1wVrc;dJ48~6`ABJs>QxB6>m#|aWzl(DE z@t4mNRsS~d_veoTWgnxEDs_Z+pwfZL12rG0qk#%o-;nx-I@<8{q11=!Xz1&kD&JJ| zEp@bo`j#?VYKl=;)E||4rPSv~S5&&K4#zuK#C{8}du8F`tgzb6jJ6A7l9{&Vcy0=( zlRUO|FGDXi4)Ag#yyA&1^QALUHeZyL^B=bHAxP$Pdv!9XJr1n&X$0&$=LXorO%)xyFiic$ z5p`F`+Ab%eKV43BU*O%V8o$%b&2(JoI88GuC(bnn7dEM~h1GpeM04&|y{_`)R8!Xn7hiJt!Bok zDS6pnp|n3M=Vt%3te)Me0F70fI!hUM+?k|$OT&DZ;GR67sr^YNI@2t_?eX1c^ zx(pwD+uq_V+{BD`@K|n41#bq)0Dt*VAkoD4QRr*p)j-)h$_>;ZH1P!b2>5=wp{iYJ zg^XS^b`K*%8Eu{|NXeRmS~@%7|)Ij#v&L=6bD= z_tE|(3RxCjC1P74SM0BT5l&qZYEsLl93h$*q7@BYG&1LqC(l zxJ(-hZ($WWf`^=Iqu_tGFGTz=v@bn5qv=V=xM*Un0=whwETsKdn;K_AjQyZDVdAL~ zjn!Ln4eQ+`-puNu*v9Il-NIjSyP}ndU5P9!h84bnuY$z>D?5j8;&rLk^cEKQAs%l{ z*I`Yhpu{nv)h`*!|BeFC3ULZh1ycBQpsIJ3Ls`h&9|Bd#SA2R00QM5WeS?tp{D@=j zKMK{$Apz~hkod+x7s==$=kuc-5KpQ={ii#s`aPo*C}rD$yaB^E)sHdpv!AHX0JTr9 zsAF09{-cpBfezd1w*&R;uN?StQ>X{t0;*sihhxfR-vKOxk0-#>f|yEvSWPlmQFTUw zH$j{QoAg1x;hkXa<*~%zT|GglY;DlnEP=N(L-5W#*AoLQZkI`Gx+ys1Dd%_V8vq>Q z<6^ONYnHq?3CHv2ouW5|TY|rGTm5m4E?movE6fbLrW#dl`)>1nuFKpr4!igs)@;k6V9YmjwOg7-7z`d3G1tj z=gIVN21Li;_OPM9%XF9Xu`KiE^ zaWd^pCkDElnB%M{a0)%h5N276B3IgWYqdH}ksjy|iR^1c#K-jlBA8H=cC!BF+D&2f zTJeld%`{%-4m+Nm#2GKy>s1C@p`b*fT3-!T(3n=m6j9a+**WW%k|&)_1w*^@SSw^L zFn3i8BXIz5W#$V9h!Y~YxN^%dzS=e>135iC>>|%F*<^bU}SikrS^Ux_9m!^Be{Hf_bwKHjK|Zv zJ6~IKSs=aa5D=6O6z;-(6x>b-6vKke+WUe4AOvSPI~TZ`1M$%^T_V9ncmz#qRoRPw zaO^f+({iT^_R@~5-LskXK!0M2VKJ}r7d`v`LW>KTUh0`0qxd|2YNA@yp3O0ycX7E9RO0gGTiSQ9 z_!*x1B5~VaBzT$NBe9mLCZE=+6C2OaQ0H^#radEIHnSh!8kIim?BulxPmdSgi@S_O zYX&|uME~?9?&?ynSoHlr;qmF9D-Xs*jyl&_fY%{I$1YKXwBs^om0OCUv`nH%GK`YP z3S}5GT9WXd<1$jFk_`yoLSsk*B)Po@Wnn@(e#kSCg@D)#12YpG>V%cjC1gcuh~$*W zbAoPN+ZyZ)cXoD$qno4A=o=fs?YBpx2jgvg3%P?@`sosxPDmf3Nrg0;kZ9wW@91)` idvf2BG)>55E?pSW-%tI~{EWMj?B!N)i~k0+ex$&m?n+gjdmBN-Nx=g(SvC(`qI1)(>|Cr(0%wy-ZWWB7j;5f};@@G=c*Ka@p{CbhGw^w?~y(}uR6 zY#@(jl3Sb8g6PR~7v(p~Da<`$2T{s$me-Glc4+2St1h5bDnEnzSXLA!!=C&}S z!9dcp=K?C1nIJz~JdnEfD#|I#8iUnFzMWZ@w##=3FswLxv=e`O=pQ`ntIn%-~DKYx7ovGq16KS0c zCM%r|GA})uO~Nb!@jV}H6(}cKcrMP05ERF{|BiDGXeEv}SbMjk5Z8^emEMH9z%Y_e zEtRj#rM4M~%mo~KR%$E@+F2|pOESs57f-w|{aEhhGFMb_&005|fCo!$8yMt^;{&%& zjrqO%;Rekj00mc?Nwn=VmbU`No3v<_Asf`e09mO`7O24i%F4JnISh1f8JEi(kgxNT z!CIs(@PN*StL(ek7uzBcSHxv;Lu}ILs)+Bc zm|vReGD}nB<0-vnsKzEuC+s(2)hBdQVZs_tSi`7QY>LS>GMC)`Nsu8P%t~Ds<{<+I O)~O%)Y<4!pM)DVwLVpeb From e9baaa861329b2e8763d37baf3653ba24f56cea0 Mon Sep 17 00:00:00 2001 From: wanghaoshuang Date: Fri, 11 Aug 2017 18:55:43 +0800 Subject: [PATCH 132/335] Fix some syntax errors. --- cloud/README.md | 30 ++++++++-------- cloud/pcloud_submit.sh | 7 +++- cloud/pcloud_train.sh | 2 -- cloud/split_data.py | 6 ++-- cloud/upload_data.py | 82 +++++++++++++++++++++--------------------- data_utils/data.py | 6 ++-- 6 files changed, 68 insertions(+), 65 deletions(-) diff --git a/cloud/README.md b/cloud/README.md index 392088cf9..8e7e49f9e 100644 --- a/cloud/README.md +++ b/cloud/README.md @@ -1,12 +1,13 @@ # Run DS2 on PaddleCloud ->Note: Make sure current directory is `models/deep_speech_2/cloud/` +>Note: +>Make sure [PaddleCloud client](https://github.com/PaddlePaddle/cloud/blob/develop/doc/usage_cn.md#%E4%B8%8B%E8%BD%BD%E5%B9%B6%E9%85%8D%E7%BD%AEpaddlecloud) has be installed and current directory is `models/deep_speech_2/cloud/` -## Step1 Configure data set +## Step-1 Configure data set -You can configure your input data and output path in pcloud_submit.sh: +Configure your input data and output path in pcloud_submit.sh: -- `TRAIN_MANIFEST`: Absolute path of train data manifest file in local file system.This file has format as bellow: +- `TRAIN_MANIFEST`: Absolute path of train data manifest file in local file system.This file has format as bellow: ``` {"audio_filepath": "/home/disk1/LibriSpeech/dev-clean/1272/128104/1272-128104-0000.flac", "duration": 5.855, "text @@ -15,19 +16,18 @@ You can configure your input data and output path in pcloud_submit.sh: ": "nor is mister ..."} ``` -- `TEST_MANIFEST`: Absolute path of train data manifest file in local filesystem.This file has format like TRAIN_MANIFEST. - +- `TEST_MANIFEST`: Absolute path of train data manifest file in local filesystem. This file has format like `TRAIN_MANIFEST`. - `VOCAB_FILE`: Absolute path of vocabulary file in local filesytem. -- `MEAN_STD_FILE`: Absolute path of vocabulary file in local filesytem. +- `MEAN_STD_FILE`: Absolute path of normalizer's statistic file in local filesytem. - `CLOUD_DATA_DIR:` Absolute path in PaddleCloud filesystem. We will upload local train data to this directory. - `CLOUD_MODEL_DIR`: Absolute path in PaddleCloud filesystem. PaddleCloud trainer will save model to this directory. +>Note: Upload will be skipped if target file has existed in `CLOUD_DATA_DIR`. ->Note: Upload will be skipped if target file has existed in ${CLOUD_DATA_DIR}. +## Step-2 Configure computation resource -## Step2 Configure computation resource +Configure computation resource in pcloud_submit.sh: -You can configure computation resource in pcloud_submit.sh: ``` # Configure computation resource and submit job to PaddleCloud paddlecloud submit \ @@ -44,10 +44,10 @@ You can configure computation resource in pcloud_submit.sh: -entry "sh pcloud_train.sh ${CLOUD_DATA_DIR} ${CLOUD_MODEL_DIR}" \ ${DS2_PATH} ``` -For more information, please refer to[PaddleCloud](https://github.com/PaddlePaddle/cloud/blob/develop/doc/usage_cn.md#提交任务) +For more information, please refer to [PaddleCloud](https://github.com/PaddlePaddle/cloud/blob/develop/doc/usage_cn.md#提交任务) -## Step3 Configure algorithm options -You can configure algorithm options in pcloud_train.sh: +## Step-3 Configure algorithm options +Configure algorithm options in pcloud_train.sh: ``` python train.py \ --use_gpu=1 \ @@ -65,13 +65,13 @@ cd .. python train.py --help ``` -## Step4 Submit job +## Step-4 Submit job ``` $ sh pcloud_submit.sh ``` -## Step5 Get logs +## Step-5 Get logs ``` $ paddlecloud logs -n 10000 deepspeech20170727130129 ``` diff --git a/cloud/pcloud_submit.sh b/cloud/pcloud_submit.sh index 179d144f4..5ecb011bc 100644 --- a/cloud/pcloud_submit.sh +++ b/cloud/pcloud_submit.sh @@ -15,6 +15,11 @@ python upload_data.py \ --vocab_file=${VOCAB_FILE} \ --mean_std_file=${MEAN_STD_FILE} \ --cloud_data_path=${CLOUD_DATA_DIR} +if [ $? -ne 0 ] +then + echo "upload data failed!" + exit 1 +fi JOB_NAME=deepspeech`date +%Y%m%d%H%M%S` DS2_PATH=${PWD%/*} @@ -27,7 +32,7 @@ paddlecloud submit \ -cpu 4 \ -gpu 4 \ -memory 10Gi \ --parallelism 1 \ +-parallelism 2 \ -pscpu 1 \ -pservers 1 \ -psmemory 10Gi \ diff --git a/cloud/pcloud_train.sh b/cloud/pcloud_train.sh index 64a0fac3b..ce1843678 100644 --- a/cloud/pcloud_train.sh +++ b/cloud/pcloud_train.sh @@ -22,8 +22,6 @@ python ./cloud/split_data.py \ python train.py \ --use_gpu=1 \ --trainer_count=4 \ ---batch_size=32 \ ---num_threads_data=4 \ --mean_std_filepath=$MEAN_STD_FILE \ --train_manifest_path='./local.train.manifest' \ --dev_manifest_path='./local.test.manifest' \ diff --git a/cloud/split_data.py b/cloud/split_data.py index 78bf31742..6b0754a80 100644 --- a/cloud/split_data.py +++ b/cloud/split_data.py @@ -1,9 +1,11 @@ -""" -This tool is used for splitting data into each node of +"""This tool is used for splitting data into each node of paddle cloud by total trainer count and current trainer id. The meaning of trainer is a instance of k8s cluster. This script should be called in paddle cloud. """ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function import os import json import argparse diff --git a/cloud/upload_data.py b/cloud/upload_data.py index 75dcf010e..3336f722b 100644 --- a/cloud/upload_data.py +++ b/cloud/upload_data.py @@ -1,5 +1,4 @@ -""" -This tool is used for preparing data for DeepSpeech2 trainning on paddle cloud. +"""This tool is used for preparing data for DeepSpeech2 trainning on paddle cloud. Steps: 1. Read original manifest and get the local path of sound files. @@ -9,6 +8,9 @@ Steps: Finally, we will get a tar file and a manifest with sound file name, duration and text. """ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function import json import os import tarfile @@ -50,7 +52,6 @@ parser.add_argument( parser.add_argument( "--cloud_data_path", required=True, - default="", type=str, help="Destination path on paddlecloud. (default: %(default)s)") args = parser.parse_args() @@ -64,8 +65,7 @@ args = parser.parse_args() def pack_data(manifest_path, out_tar_path, out_manifest_path): - ''' - 1. According manifest, tar sound files into out_tar_path + '''1. According to the manifest, tar sound files into out_tar_path 2. Generate a new manifest for output tar file ''' out_tar = tarfile.open(out_tar_path, 'w') @@ -83,65 +83,65 @@ def pack_data(manifest_path, out_tar_path, out_manifest_path): out_tar.close() +def pcloud_cp(src, dst): + """Copy src from local filesytem to dst in PaddleCloud filesystem. + """ + ret = call(['paddlecloud', 'cp', src, dst]) + return ret + + +def pcloud_exist(path): + """Check if file or directory exists in PaddleCloud filesystem. + """ + ret = call(['paddlecloud', 'ls', path]) + return ret + + if __name__ == '__main__': - cloud_train_manifest = "%s/%s" % (args.cloud_data_path, TRAIN_MANIFEST) - cloud_train_tar = "%s/%s" % (args.cloud_data_path, TRAIN_TAR) - cloud_test_manifest = "%s/%s" % (args.cloud_data_path, TEST_MANIFEST) - cloud_test_tar = "%s/%s" % (args.cloud_data_path, TEST_TAR) - cloud_vocab_file = "%s/%s" % (args.cloud_data_path, VOCAB_FILE) - cloud_mean_file = "%s/%s" % (args.cloud_data_path, MEAN_STD_FILE) - - local_train_manifest = "%s/%s" % (args.local_tmp_path, TRAIN_MANIFEST) - local_train_tar = "%s/%s" % (args.local_tmp_path, TRAIN_TAR) - local_test_manifest = "%s/%s" % (args.local_tmp_path, TEST_MANIFEST) - local_test_tar = "%s/%s" % (args.local_tmp_path, TEST_TAR) + cloud_train_manifest = os.path.join(args.cloud_data_path, TRAIN_MANIFEST) + cloud_train_tar = os.path.join(args.cloud_data_path, TRAIN_TAR) + cloud_test_manifest = os.path.join(args.cloud_data_path, TEST_MANIFEST) + cloud_test_tar = os.path.join(args.cloud_data_path, TEST_TAR) + cloud_vocab_file = os.path.join(args.cloud_data_path, VOCAB_FILE) + cloud_mean_file = os.path.join(args.cloud_data_path, MEAN_STD_FILE) + + local_train_manifest = os.path.join(args.local_tmp_path, TRAIN_MANIFEST) + local_train_tar = os.path.join(args.local_tmp_path, TRAIN_TAR) + local_test_manifest = os.path.join(args.local_tmp_path, TEST_MANIFEST) + local_test_tar = os.path.join(args.local_tmp_path, TEST_TAR) if os.path.exists(args.local_tmp_path): shutil.rmtree(args.local_tmp_path) os.makedirs(args.local_tmp_path) - ret = 1 # train data if args.train_manifest_path != "": - ret = call(['paddlecloud', 'ls', cloud_train_manifest]) + ret = pcloud_exist(cloud_train_manifest) if ret != 0: - print "%s does't exist" % cloud_train_manifest pack_data(args.train_manifest_path, local_train_tar, local_train_manifest) - call([ - 'paddlecloud', 'cp', local_train_manifest, cloud_train_manifest - ]) - call(['paddlecloud', 'cp', local_train_tar, cloud_train_tar]) + pcloud_cp(local_train_manifest, cloud_train_manifest) + pcloud_cp(local_train_tar, cloud_train_tar) # test data if args.test_manifest_path != "": - try: - ret = call(['paddlecloud', 'ls', cloud_test_manifest]) - except Exception: - ret = 1 + ret = pcloud_exist(cloud_test_manifest) if ret != 0: pack_data(args.test_manifest_path, local_test_tar, local_test_manifest) - call( - ['paddlecloud', 'cp', local_test_manifest, cloud_test_manifest]) - call(['paddlecloud', 'cp', local_test_tar, cloud_test_tar]) + pcloud_cp(local_test_manifest, cloud_test_manifest) + pcloud_cp(local_test_tar, cloud_test_tar) # vocab file if args.vocab_file != "": - try: - ret = call(['paddlecloud', 'ls', cloud_vocab_file]) - except Exception: - ret = 1 + ret = pcloud_exist(cloud_vocab_file) if ret != 0: - call(['paddlecloud', 'cp', args.vocab_file, cloud_vocab_file]) + pcloud_cp(args.vocab_file, cloud_vocab_file) # mean_std file if args.mean_std_file != "": - try: - ret = call(['paddlecloud', 'ls', cloud_mean_file]) - except Exception: - ret = 1 + ret = pcloud_exist(cloud_mean_file) if ret != 0: - call(['paddlecloud', 'cp', args.mean_std_file, cloud_mean_file]) + pcloud_cp(args.mean_std_file, cloud_mean_file) - os.removedirs(args.local_tmp_path) + shutil.rmtree(args.local_tmp_path) diff --git a/data_utils/data.py b/data_utils/data.py index 1e524b0f1..f404b4fa7 100644 --- a/data_utils/data.py +++ b/data_utils/data.py @@ -89,9 +89,6 @@ class DataGenerator(object): self._num_threads = num_threads self._rng = random.Random(random_seed) self._epoch = 0 - # for caching tar files info - self.tar2info = {} - self.tar2object = {} def process_utterance(self, filename, transcript): """Load, augment, featurize and normalize for speech data. @@ -237,6 +234,7 @@ class DataGenerator(object): def _get_file_object(self, file): """Get file object by file path. + If file startwith tar, it will return a tar file object and cached tar file info for next reading request. It will return file directly, if the type of file is not str. @@ -254,7 +252,7 @@ class DataGenerator(object): return local_data.tar2object[tarpath].extractfile( local_data.tar2info[tarpath][filename]) else: - return open(file) + return open(file, 'r') def _instance_reader_creator(self, manifest): """ From b41768125112714413f24274a5b1974d408d6738 Mon Sep 17 00:00:00 2001 From: wanghaoshuang Date: Fri, 11 Aug 2017 18:59:53 +0800 Subject: [PATCH 133/335] Change the default values in pcloud_train to those listed in train.py. --- cloud/pcloud_train.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/cloud/pcloud_train.sh b/cloud/pcloud_train.sh index ce1843678..b9a50360a 100644 --- a/cloud/pcloud_train.sh +++ b/cloud/pcloud_train.sh @@ -21,7 +21,6 @@ python ./cloud/split_data.py \ python train.py \ --use_gpu=1 \ ---trainer_count=4 \ --mean_std_filepath=$MEAN_STD_FILE \ --train_manifest_path='./local.train.manifest' \ --dev_manifest_path='./local.test.manifest' \ From 19824a8d9850371880ec01fd2698dc67299e6d96 Mon Sep 17 00:00:00 2001 From: wanghaoshuang Date: Mon, 14 Aug 2017 19:28:38 +0800 Subject: [PATCH 134/335] Move local data from global into class DataGenerator. --- data_utils/data.py | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/data_utils/data.py b/data_utils/data.py index f404b4fa7..98180b4be 100644 --- a/data_utils/data.py +++ b/data_utils/data.py @@ -17,11 +17,6 @@ from data_utils.featurizer.speech_featurizer import SpeechFeaturizer from data_utils.speech import SpeechSegment from data_utils.normalizer import FeatureNormalizer -# for caching tar files info -local_data = local() -local_data.tar2info = {} -local_data.tar2object = {} - class DataGenerator(object): """ @@ -89,6 +84,10 @@ class DataGenerator(object): self._num_threads = num_threads self._rng = random.Random(random_seed) self._epoch = 0 + # for caching tar files info + self.local_data = local() + self.local_data.tar2info = {} + self.local_data.tar2object = {} def process_utterance(self, filename, transcript): """Load, augment, featurize and normalize for speech data. @@ -241,16 +240,16 @@ class DataGenerator(object): """ if file.startswith('tar:'): tarpath, filename = file.split(':', 1)[1].split('#', 1) - if 'tar2info' not in local_data.__dict__: - local_data.tar2info = {} - if 'tar2object' not in local_data.__dict__: - local_data.tar2object = {} - if tarpath not in local_data.tar2info: + if 'tar2info' not in self.local_data.__dict__: + self.local_data.tar2info = {} + if 'tar2object' not in self.local_data.__dict__: + self.local_data.tar2object = {} + if tarpath not in self.local_data.tar2info: object, infoes = self._parse_tar(tarpath) - local_data.tar2info[tarpath] = infoes - local_data.tar2object[tarpath] = object - return local_data.tar2object[tarpath].extractfile( - local_data.tar2info[tarpath][filename]) + self.local_data.tar2info[tarpath] = infoes + self.local_data.tar2object[tarpath] = object + return self.local_data.tar2object[tarpath].extractfile( + self.local_data.tar2info[tarpath][filename]) else: return open(file, 'r') From 0e79ee37a4aa9c706a678ac3f4fcca333bf5795a Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Mon, 14 Aug 2017 19:47:22 +0800 Subject: [PATCH 135/335] Bug fix and refine cloud training for DS2. Summary: 1. Add missing is_local argument (when set False, use pserver). 2. Add exception thrown if cp failed. 3. Add cloud mkdir if not cloud path for uploading does not exist. 4. Fix a bug using common path ./local_manifest for all nodes. (convert to /local_manifest) 5. Refine coding style. --- cloud/_init_paths.py | 17 +++++++ cloud/pcloud_submit.sh | 37 ++++++++------ cloud/pcloud_train.sh | 32 +++++++----- cloud/split_data.py | 1 + cloud/upload_data.py | 107 ++++++++++++++++++++--------------------- train.py | 11 ++++- 6 files changed, 120 insertions(+), 85 deletions(-) create mode 100644 cloud/_init_paths.py diff --git a/cloud/_init_paths.py b/cloud/_init_paths.py new file mode 100644 index 000000000..3305d7488 --- /dev/null +++ b/cloud/_init_paths.py @@ -0,0 +1,17 @@ +"""Set up paths for DS2""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os.path +import sys + + +def add_path(path): + if path not in sys.path: + sys.path.insert(0, path) + + +this_dir = os.path.dirname(__file__) +proj_path = os.path.join(this_dir, '..') +add_path(proj_path) diff --git a/cloud/pcloud_submit.sh b/cloud/pcloud_submit.sh index 5ecb011bc..78172c1a5 100644 --- a/cloud/pcloud_submit.sh +++ b/cloud/pcloud_submit.sh @@ -1,17 +1,22 @@ # Configure input data set in local filesystem -TRAIN_MANIFEST="/home/work/demo/ds2/pcloud/models/deep_speech_2/datasets/manifest.dev" -TEST_MANIFEST="/home/work/demo/ds2/pcloud/models/deep_speech_2/datasets/manifest.dev" -VOCAB_FILE="/home/work/demo/ds2/pcloud/models/deep_speech_2/datasets/vocab/eng_vocab.txt" -MEAN_STD_FILE="/home/work/demo/ds2/pcloud/models/deep_speech_2/mean_std.npz" - +TRAIN_MANIFEST="../datasets/manifest.dev" +DEV_MANIFEST="../datasets/manifest.dev" +VOCAB_FILE="../datasets/vocab/eng_vocab.txt" +MEAN_STD_FILE="../mean_std.npz" # Configure output path in PaddleCloud filesystem -CLOUD_DATA_DIR="/pfs/dlnel/home/demo/deepspeech2/data" -CLOUD_MODEL_DIR="/pfs/dlnel/home/demo/deepspeech2/model" +CLOUD_DATA_DIR="/pfs/dlnel/home/sunxinghai@baidu.com/deepspeech2/data" +CLOUD_MODEL_DIR="/pfs/dlnel/home/sunxinghai@baidu.com/deepspeech2/model" +# Configure cloud resources +NUM_CPU=12 +NUM_GPU=4 +NUM_NODE=2 +MEMORY="10Gi" +IS_LOCAL="False" # Pack and upload local data to PaddleCloud filesystem python upload_data.py \ --train_manifest_path=${TRAIN_MANIFEST} \ ---test_manifest_path=${TEST_MANIFEST} \ +--dev_manifest_path=${DEV_MANIFEST} \ --vocab_file=${VOCAB_FILE} \ --mean_std_file=${MEAN_STD_FILE} \ --cloud_data_path=${CLOUD_DATA_DIR} @@ -21,23 +26,23 @@ then exit 1 fi -JOB_NAME=deepspeech`date +%Y%m%d%H%M%S` +# Submit job to PaddleCloud +JOB_NAME=deepspeech-`date +%Y%m%d%H%M%S` DS2_PATH=${PWD%/*} cp -f pcloud_train.sh ${DS2_PATH} -# Configure computation resource and submit job to PaddleCloud paddlecloud submit \ -image bootstrapper:5000/wanghaoshuang/pcloud_ds2:latest \ -jobname ${JOB_NAME} \ --cpu 4 \ --gpu 4 \ --memory 10Gi \ --parallelism 2 \ +-cpu ${NUM_CPU} \ +-gpu ${NUM_GPU} \ +-memory ${MEMORY} \ +-parallelism ${NUM_NODE} \ -pscpu 1 \ -pservers 1 \ --psmemory 10Gi \ +-psmemory ${MEMORY} \ -passes 1 \ --entry "sh pcloud_train.sh ${CLOUD_DATA_DIR} ${CLOUD_MODEL_DIR}" \ +-entry "sh pcloud_train.sh ${CLOUD_DATA_DIR} ${CLOUD_MODEL_DIR} ${NUM_CPU} ${NUM_GPU} ${IS_LOCAL}" \ ${DS2_PATH} rm ${DS2_PATH}/pcloud_train.sh diff --git a/cloud/pcloud_train.sh b/cloud/pcloud_train.sh index b9a50360a..21bd43f92 100644 --- a/cloud/pcloud_train.sh +++ b/cloud/pcloud_train.sh @@ -1,28 +1,36 @@ DATA_PATH=$1 MODEL_PATH=$2 +NUM_CPU=$3 +NUM_GPU=$4 +IS_LOCAL=$5 + TRAIN_MANI=${DATA_PATH}/cloud.train.manifest -DEV_MANI=${DATA_PATH}/cloud.test.manifest +DEV_MANI=${DATA_PATH}/cloud.dev.manifest TRAIN_TAR=${DATA_PATH}/cloud.train.tar -DEV_TAR=${DATA_PATH}/cloud.test.tar +DEV_TAR=${DATA_PATH}/cloud.dev.tar VOCAB_PATH=${DATA_PATH}/vocab.txt MEAN_STD_FILE=${DATA_PATH}/mean_std.npz # split train data for each pcloud node python ./cloud/split_data.py \ ---in_manifest_path=$TRAIN_MANI \ ---data_tar_path=$TRAIN_TAR \ ---out_manifest_path='./local.train.manifest' +--in_manifest_path=${TRAIN_MANI} \ +--data_tar_path=${TRAIN_TAR} \ +--out_manifest_path='/local.train.manifest' # split dev data for each pcloud node python ./cloud/split_data.py \ ---in_manifest_path=$DEV_MANI \ ---data_tar_path=$DEV_TAR \ ---out_manifest_path='./local.test.manifest' +--in_manifest_path=${DEV_MANI} \ +--data_tar_path=${DEV_TAR} \ +--out_manifest_path='/local.dev.manifest' +# run train python train.py \ --use_gpu=1 \ ---mean_std_filepath=$MEAN_STD_FILE \ ---train_manifest_path='./local.train.manifest' \ ---dev_manifest_path='./local.test.manifest' \ ---vocab_filepath=$VOCAB_PATH \ +--trainer_count=${NUM_GPU} \ +--num_threads_data=${NUM_CPU} \ +--is_local=${IS_LOCAL} \ +--mean_std_filepath=${MEAN_STD_FILE} \ +--train_manifest_path='/local.train.manifest' \ +--dev_manifest_path='/local.dev.manifest' \ +--vocab_filepath=${VOCAB_PATH} \ --output_model_dir=${MODEL_PATH} diff --git a/cloud/split_data.py b/cloud/split_data.py index 6b0754a80..8df194a62 100644 --- a/cloud/split_data.py +++ b/cloud/split_data.py @@ -6,6 +6,7 @@ This script should be called in paddle cloud. from __future__ import absolute_import from __future__ import division from __future__ import print_function + import os import json import argparse diff --git a/cloud/upload_data.py b/cloud/upload_data.py index 3336f722b..efa9e77c0 100644 --- a/cloud/upload_data.py +++ b/cloud/upload_data.py @@ -1,30 +1,31 @@ -"""This tool is used for preparing data for DeepSpeech2 trainning on paddle cloud. +"""This script is used for preparing data for DeepSpeech2 trainning on paddle +cloud. Steps: 1. Read original manifest and get the local path of sound files. 2. Tar all local sound files into one tar file. 3. Modify original manifest to remove the local path information. -Finally, we will get a tar file and a manifest with sound file name, duration -and text. +Finally, we will get a tar file and a new manifest. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function + import json import os import tarfile import sys import argparse import shutil -sys.path.append('../') -from data_utils.utils import read_manifest from subprocess import call +import _init_paths +from data_utils.utils import read_manifest TRAIN_TAR = "cloud.train.tar" TRAIN_MANIFEST = "cloud.train.manifest" -TEST_TAR = "cloud.test.tar" -TEST_MANIFEST = "cloud.test.manifest" +DEV_TAR = "cloud.dev.tar" +DEV_MANIFEST = "cloud.dev.manifest" VOCAB_FILE = "vocab.txt" MEAN_STD_FILE = "mean_std.npz" @@ -33,41 +34,41 @@ parser.add_argument( "--train_manifest_path", default="../datasets/manifest.train", type=str, - help="Manifest file of train data. (default: %(default)s)") + help="Manifest file path for train data. (default: %(default)s)") parser.add_argument( - "--test_manifest_path", - default="../datasets/manifest.test", + "--dev_manifest_path", + default="../datasets/manifest.dev", type=str, - help="Manifest file of test data. (default: %(default)s)") + help="Manifest file path for validation data. (default: %(default)s)") parser.add_argument( "--vocab_file", default="../datasets/vocab/eng_vocab.txt", type=str, - help="Vocab file to be uploaded to paddlecloud. (default: %(default)s)") + help="Vocabulary file to be uploaded to paddlecloud. " + "(default: %(default)s)") parser.add_argument( "--mean_std_file", default="../mean_std.npz", type=str, - help="mean_std file to be uploaded to paddlecloud. (default: %(default)s)") + help="Normalizer's statistics (mean and stddev) file to be uploaded to " + "paddlecloud. (default: %(default)s)") parser.add_argument( "--cloud_data_path", required=True, type=str, - help="Destination path on paddlecloud. (default: %(default)s)") -args = parser.parse_args() - + help="Destination path on paddlecloud. (default: %(default)s)") parser.add_argument( "--local_tmp_path", default="./tmp/", type=str, - help="Local directory for storing temporary data. (default: %(default)s)") + help="Local directory for storing temporary data. (default: %(default)s)") args = parser.parse_args() def pack_data(manifest_path, out_tar_path, out_manifest_path): - '''1. According to the manifest, tar sound files into out_tar_path - 2. Generate a new manifest for output tar file - ''' + """1. According to the manifest, tar sound files into out_tar_path. + 2. Generate a new manifest for output tar file. + """ out_tar = tarfile.open(out_tar_path, 'w') manifest = read_manifest(manifest_path) results = [] @@ -83,11 +84,19 @@ def pack_data(manifest_path, out_tar_path, out_manifest_path): out_tar.close() +def pcloud_mkdir(dir): + """Make directory in PaddleCloud filesystem. + """ + if call(['paddlecloud', 'mkdir', dir]) != 0: + raise IOError("PaddleCloud mkdir failed: %s." % dir) + + def pcloud_cp(src, dst): - """Copy src from local filesytem to dst in PaddleCloud filesystem. + """Copy src from local filesytem to dst in PaddleCloud filesystem, + or downlowd src from PaddleCloud filesystem to dst in local filesystem. """ - ret = call(['paddlecloud', 'cp', src, dst]) - return ret + if call(['paddlecloud', 'cp', src, dst]) != 0: + raise IOError("PaddleCloud cp failed: from [%s] to [%s]." % (src, dst)) def pcloud_exist(path): @@ -100,48 +109,34 @@ def pcloud_exist(path): if __name__ == '__main__': cloud_train_manifest = os.path.join(args.cloud_data_path, TRAIN_MANIFEST) cloud_train_tar = os.path.join(args.cloud_data_path, TRAIN_TAR) - cloud_test_manifest = os.path.join(args.cloud_data_path, TEST_MANIFEST) - cloud_test_tar = os.path.join(args.cloud_data_path, TEST_TAR) + cloud_dev_manifest = os.path.join(args.cloud_data_path, DEV_MANIFEST) + cloud_dev_tar = os.path.join(args.cloud_data_path, DEV_TAR) cloud_vocab_file = os.path.join(args.cloud_data_path, VOCAB_FILE) cloud_mean_file = os.path.join(args.cloud_data_path, MEAN_STD_FILE) local_train_manifest = os.path.join(args.local_tmp_path, TRAIN_MANIFEST) local_train_tar = os.path.join(args.local_tmp_path, TRAIN_TAR) - local_test_manifest = os.path.join(args.local_tmp_path, TEST_MANIFEST) - local_test_tar = os.path.join(args.local_tmp_path, TEST_TAR) + local_dev_manifest = os.path.join(args.local_tmp_path, DEV_MANIFEST) + local_dev_tar = os.path.join(args.local_tmp_path, DEV_TAR) + # prepare local and cloud dir if os.path.exists(args.local_tmp_path): shutil.rmtree(args.local_tmp_path) os.makedirs(args.local_tmp_path) + pcloud_mkdir(args.cloud_data_path) + + # pack and upload train data + pack_data(args.train_manifest_path, local_train_tar, local_train_manifest) + pcloud_cp(local_train_manifest, cloud_train_manifest) + pcloud_cp(local_train_tar, cloud_train_tar) + + # pack and upload validation data + pack_data(args.dev_manifest_path, local_dev_tar, local_dev_manifest) + pcloud_cp(local_dev_manifest, cloud_dev_manifest) + pcloud_cp(local_dev_tar, cloud_dev_tar) - # train data - if args.train_manifest_path != "": - ret = pcloud_exist(cloud_train_manifest) - if ret != 0: - pack_data(args.train_manifest_path, local_train_tar, - local_train_manifest) - pcloud_cp(local_train_manifest, cloud_train_manifest) - pcloud_cp(local_train_tar, cloud_train_tar) - - # test data - if args.test_manifest_path != "": - ret = pcloud_exist(cloud_test_manifest) - if ret != 0: - pack_data(args.test_manifest_path, local_test_tar, - local_test_manifest) - pcloud_cp(local_test_manifest, cloud_test_manifest) - pcloud_cp(local_test_tar, cloud_test_tar) - - # vocab file - if args.vocab_file != "": - ret = pcloud_exist(cloud_vocab_file) - if ret != 0: - pcloud_cp(args.vocab_file, cloud_vocab_file) - - # mean_std file - if args.mean_std_file != "": - ret = pcloud_exist(cloud_mean_file) - if ret != 0: - pcloud_cp(args.mean_std_file, cloud_mean_file) + # upload vocab file and mean_std file + pcloud_cp(args.vocab_file, cloud_vocab_file) + pcloud_cp(args.mean_std_file, cloud_mean_file) shutil.rmtree(args.local_tmp_path) diff --git a/train.py b/train.py index 0d4e2508d..379e364c9 100644 --- a/train.py +++ b/train.py @@ -127,6 +127,12 @@ parser.add_argument( type=str, help="Augmentation configuration in json-format. " "(default: %(default)s)") +parser.add_argument( + "--is_local", + default=True, + type=distutils.util.strtobool, + help="Set to false if running with pserver in paddlecloud. " + "(default: %(default)s)") args = parser.parse_args() @@ -178,7 +184,10 @@ def train(): def main(): utils.print_arguments(args) - paddle.init(use_gpu=args.use_gpu, trainer_count=args.trainer_count) + paddle.init( + use_gpu=args.use_gpu, + trainer_count=args.trainer_count, + is_local=args.is_local) train() From c767f201b260116d32d8e694fc78b1b11b367efe Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Mon, 14 Aug 2017 19:54:26 +0800 Subject: [PATCH 136/335] Update default setting for cloud training for DS2. --- cloud/pcloud_submit.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cloud/pcloud_submit.sh b/cloud/pcloud_submit.sh index 78172c1a5..2fb80d667 100644 --- a/cloud/pcloud_submit.sh +++ b/cloud/pcloud_submit.sh @@ -1,5 +1,5 @@ # Configure input data set in local filesystem -TRAIN_MANIFEST="../datasets/manifest.dev" +TRAIN_MANIFEST="../datasets/manifest.train" DEV_MANIFEST="../datasets/manifest.dev" VOCAB_FILE="../datasets/vocab/eng_vocab.txt" MEAN_STD_FILE="../mean_std.npz" @@ -8,10 +8,10 @@ CLOUD_DATA_DIR="/pfs/dlnel/home/sunxinghai@baidu.com/deepspeech2/data" CLOUD_MODEL_DIR="/pfs/dlnel/home/sunxinghai@baidu.com/deepspeech2/model" # Configure cloud resources NUM_CPU=12 -NUM_GPU=4 -NUM_NODE=2 +NUM_GPU=8 +NUM_NODE=1 MEMORY="10Gi" -IS_LOCAL="False" +IS_LOCAL="True" # Pack and upload local data to PaddleCloud filesystem python upload_data.py \ From 4b26bf620cc32c908964d7ec68b7ec6bec491206 Mon Sep 17 00:00:00 2001 From: wanghaoshuang Date: Mon, 14 Aug 2017 20:42:09 +0800 Subject: [PATCH 137/335] Rename self.local_data to self._local_data in class DataGenerator. --- data_utils/data.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/data_utils/data.py b/data_utils/data.py index 98180b4be..33fcadc7b 100644 --- a/data_utils/data.py +++ b/data_utils/data.py @@ -85,9 +85,9 @@ class DataGenerator(object): self._rng = random.Random(random_seed) self._epoch = 0 # for caching tar files info - self.local_data = local() - self.local_data.tar2info = {} - self.local_data.tar2object = {} + self._local_data = local() + self._local_data.tar2info = {} + self._local_data.tar2object = {} def process_utterance(self, filename, transcript): """Load, augment, featurize and normalize for speech data. @@ -240,16 +240,16 @@ class DataGenerator(object): """ if file.startswith('tar:'): tarpath, filename = file.split(':', 1)[1].split('#', 1) - if 'tar2info' not in self.local_data.__dict__: - self.local_data.tar2info = {} - if 'tar2object' not in self.local_data.__dict__: - self.local_data.tar2object = {} - if tarpath not in self.local_data.tar2info: + if 'tar2info' not in self._local_data.__dict__: + self._local_data.tar2info = {} + if 'tar2object' not in self._local_data.__dict__: + self._local_data.tar2object = {} + if tarpath not in self._local_data.tar2info: object, infoes = self._parse_tar(tarpath) - self.local_data.tar2info[tarpath] = infoes - self.local_data.tar2object[tarpath] = object - return self.local_data.tar2object[tarpath].extractfile( - self.local_data.tar2info[tarpath][filename]) + self._local_data.tar2info[tarpath] = infoes + self._local_data.tar2object[tarpath] = object + return self._local_data.tar2object[tarpath].extractfile( + self._local_data.tar2info[tarpath][filename]) else: return open(file, 'r') From 75719fea22677d46b44fce1aa0beb05dae377ccb Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Mon, 14 Aug 2017 20:21:09 +0800 Subject: [PATCH 138/335] Fix an incorrect usage of is_local argument. --- cloud/pcloud_submit.sh | 2 +- model.py | 6 +++++- train.py | 8 +++----- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/cloud/pcloud_submit.sh b/cloud/pcloud_submit.sh index 2fb80d667..3a64f32e2 100644 --- a/cloud/pcloud_submit.sh +++ b/cloud/pcloud_submit.sh @@ -7,7 +7,7 @@ MEAN_STD_FILE="../mean_std.npz" CLOUD_DATA_DIR="/pfs/dlnel/home/sunxinghai@baidu.com/deepspeech2/data" CLOUD_MODEL_DIR="/pfs/dlnel/home/sunxinghai@baidu.com/deepspeech2/model" # Configure cloud resources -NUM_CPU=12 +NUM_CPU=8 NUM_GPU=8 NUM_NODE=1 MEMORY="10Gi" diff --git a/model.py b/model.py index e2f2903b6..99412e595 100644 --- a/model.py +++ b/model.py @@ -46,6 +46,7 @@ class DeepSpeech2Model(object): gradient_clipping, num_passes, output_model_dir, + is_local=True, num_iterations_print=100): """Train the model. @@ -65,6 +66,8 @@ class DeepSpeech2Model(object): :param num_iterations_print: Number of training iterations for printing a training loss. :type rnn_iteratons_print: int + :param is_local: Set to False if running with pserver with multi-nodes. + :type is_local: bool :param output_model_dir: Directory for saving the model (every pass). :type output_model_dir: basestring """ @@ -79,7 +82,8 @@ class DeepSpeech2Model(object): trainer = paddle.trainer.SGD( cost=self._loss, parameters=self._parameters, - update_equation=optimizer) + update_equation=optimizer, + is_local=is_local) # create event handler def event_handler(event): diff --git a/train.py b/train.py index 379e364c9..262d8bf01 100644 --- a/train.py +++ b/train.py @@ -179,15 +179,13 @@ def train(): gradient_clipping=400, num_passes=args.num_passes, num_iterations_print=args.num_iterations_print, - output_model_dir=args.output_model_dir) + output_model_dir=args.output_model_dir, + is_local=args.is_local) def main(): utils.print_arguments(args) - paddle.init( - use_gpu=args.use_gpu, - trainer_count=args.trainer_count, - is_local=args.is_local) + paddle.init(use_gpu=args.use_gpu, trainer_count=args.trainer_count) train() From f75746cd31d69aa14ec57327b5bff73c69a8a9dc Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Tue, 15 Aug 2017 16:53:26 +0800 Subject: [PATCH 139/335] Seperate data uploading from job summission for DS2 cloud training and add support for multiple shards uploading. --- cloud/pcloud_submit.sh | 35 ++------ cloud/pcloud_train.sh | 40 ++++----- cloud/pcloud_upload_data.sh | 17 ++++ cloud/split_data.py | 36 +++------ cloud/upload_data.py | 156 ++++++++++++++++-------------------- 5 files changed, 118 insertions(+), 166 deletions(-) create mode 100644 cloud/pcloud_upload_data.sh diff --git a/cloud/pcloud_submit.sh b/cloud/pcloud_submit.sh index 3a64f32e2..35fe54f2c 100644 --- a/cloud/pcloud_submit.sh +++ b/cloud/pcloud_submit.sh @@ -1,32 +1,11 @@ -# Configure input data set in local filesystem -TRAIN_MANIFEST="../datasets/manifest.train" -DEV_MANIFEST="../datasets/manifest.dev" -VOCAB_FILE="../datasets/vocab/eng_vocab.txt" -MEAN_STD_FILE="../mean_std.npz" -# Configure output path in PaddleCloud filesystem -CLOUD_DATA_DIR="/pfs/dlnel/home/sunxinghai@baidu.com/deepspeech2/data" +TRAIN_MANIFEST="cloud/cloud.manifest.test" +DEV_MANIFEST="cloud/cloud.manifest.dev" CLOUD_MODEL_DIR="/pfs/dlnel/home/sunxinghai@baidu.com/deepspeech2/model" -# Configure cloud resources -NUM_CPU=8 +BATCH_SIZE=256 NUM_GPU=8 NUM_NODE=1 -MEMORY="10Gi" IS_LOCAL="True" -# Pack and upload local data to PaddleCloud filesystem -python upload_data.py \ ---train_manifest_path=${TRAIN_MANIFEST} \ ---dev_manifest_path=${DEV_MANIFEST} \ ---vocab_file=${VOCAB_FILE} \ ---mean_std_file=${MEAN_STD_FILE} \ ---cloud_data_path=${CLOUD_DATA_DIR} -if [ $? -ne 0 ] -then - echo "upload data failed!" - exit 1 -fi - -# Submit job to PaddleCloud JOB_NAME=deepspeech-`date +%Y%m%d%H%M%S` DS2_PATH=${PWD%/*} cp -f pcloud_train.sh ${DS2_PATH} @@ -34,15 +13,15 @@ cp -f pcloud_train.sh ${DS2_PATH} paddlecloud submit \ -image bootstrapper:5000/wanghaoshuang/pcloud_ds2:latest \ -jobname ${JOB_NAME} \ --cpu ${NUM_CPU} \ +-cpu ${NUM_GPU} \ -gpu ${NUM_GPU} \ --memory ${MEMORY} \ +-memory 10Gi \ -parallelism ${NUM_NODE} \ -pscpu 1 \ -pservers 1 \ --psmemory ${MEMORY} \ +-psmemory 10Gi \ -passes 1 \ --entry "sh pcloud_train.sh ${CLOUD_DATA_DIR} ${CLOUD_MODEL_DIR} ${NUM_CPU} ${NUM_GPU} ${IS_LOCAL}" \ +-entry "sh pcloud_train.sh ${TRAIN_MANIFEST} ${DEV_MANIFEST} ${CLOUD_MODEL_DIR} ${NUM_GPU} ${BATCH_SIZE} ${IS_LOCAL}" \ ${DS2_PATH} rm ${DS2_PATH}/pcloud_train.sh diff --git a/cloud/pcloud_train.sh b/cloud/pcloud_train.sh index 21bd43f92..e42da1d62 100644 --- a/cloud/pcloud_train.sh +++ b/cloud/pcloud_train.sh @@ -1,36 +1,24 @@ -DATA_PATH=$1 -MODEL_PATH=$2 -NUM_CPU=$3 +TRAIN_MANIFEST=$1 +DEV_MANIFEST=$2 +MODEL_PATH=$3 NUM_GPU=$4 -IS_LOCAL=$5 +BATCH_SIZE=$5 +IS_LOCAL=$6 -TRAIN_MANI=${DATA_PATH}/cloud.train.manifest -DEV_MANI=${DATA_PATH}/cloud.dev.manifest -TRAIN_TAR=${DATA_PATH}/cloud.train.tar -DEV_TAR=${DATA_PATH}/cloud.dev.tar -VOCAB_PATH=${DATA_PATH}/vocab.txt -MEAN_STD_FILE=${DATA_PATH}/mean_std.npz - -# split train data for each pcloud node python ./cloud/split_data.py \ ---in_manifest_path=${TRAIN_MANI} \ ---data_tar_path=${TRAIN_TAR} \ ---out_manifest_path='/local.train.manifest' +--in_manifest_path=${TRAIN_MANIFEST} \ +--out_manifest_path='/local.manifest.train' -# split dev data for each pcloud node python ./cloud/split_data.py \ ---in_manifest_path=${DEV_MANI} \ ---data_tar_path=${DEV_TAR} \ ---out_manifest_path='/local.dev.manifest' +--in_manifest_path=${DEV_MANIFEST} \ +--out_manifest_path='/local.manifest.dev' -# run train python train.py \ +--batch_size=$BATCH_SIZE \ --use_gpu=1 \ --trainer_count=${NUM_GPU} \ ---num_threads_data=${NUM_CPU} \ +--num_threads_data=${NUM_GPU} \ --is_local=${IS_LOCAL} \ ---mean_std_filepath=${MEAN_STD_FILE} \ ---train_manifest_path='/local.train.manifest' \ ---dev_manifest_path='/local.dev.manifest' \ ---vocab_filepath=${VOCAB_PATH} \ ---output_model_dir=${MODEL_PATH} +--train_manifest_path='/local.manifest.train' \ +--dev_manifest_path='/local.manifest.dev' \ +--output_model_dir=${MODEL_PATH} \ diff --git a/cloud/pcloud_upload_data.sh b/cloud/pcloud_upload_data.sh new file mode 100644 index 000000000..1422b8a10 --- /dev/null +++ b/cloud/pcloud_upload_data.sh @@ -0,0 +1,17 @@ +IN_MANIFESTS="../datasets/manifest.tmp ../datasets/manifest.dev ../datasets/manifest.test" +OUT_MANIFESTS="./cloud.manifest.tmp ./cloud.manifest.dev ./cloud.manifest.test" +CLOUD_DATA_DIR="/pfs/dlnel/home/sunxinghai@baidu.com/deepspeech2/data" +NUM_SHARDS=10 + +python upload_data.py \ +--in_manifest_paths ${IN_MANIFESTS} \ +--out_manifest_paths ${OUT_MANIFESTS} \ +--cloud_data_dir ${CLOUD_DATA_DIR} \ +--num_shards ${NUM_SHARDS} + +if [ $? -ne 0 ] +then + echo "Upload Data Failed!" + exit 1 +fi +echo "All Done." diff --git a/cloud/split_data.py b/cloud/split_data.py index 8df194a62..3496d52bf 100644 --- a/cloud/split_data.py +++ b/cloud/split_data.py @@ -1,7 +1,5 @@ """This tool is used for splitting data into each node of -paddle cloud by total trainer count and current trainer id. -The meaning of trainer is a instance of k8s cluster. -This script should be called in paddle cloud. +paddlecloud. This script should be called in paddlecloud. """ from __future__ import absolute_import from __future__ import division @@ -14,40 +12,30 @@ import argparse parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( "--in_manifest_path", - default='./cloud.train.manifest', type=str, - help="Input manifest path. (default: %(default)s)") -parser.add_argument( - "--data_tar_path", - default='./cloud.train.tar', - type=str, - help="Data tar file path. (default: %(default)s)") + required=True, + help="Input manifest path for all nodes.") parser.add_argument( "--out_manifest_path", - default='./local.train.manifest', type=str, - help="Out manifest file path. (default: %(default)s)") + required=True, + help="Output manifest file path for current node.") args = parser.parse_args() -def split_data(in_manifest, tar_path, out_manifest): +def split_data(in_manifest_path, out_manifest_path): with open("/trainer_id", "r") as f: trainer_id = int(f.readline()[:-1]) with open("/trainer_count", "r") as f: trainer_count = int(f.readline()[:-1]) - tar_path = os.path.abspath(tar_path) - result = [] - for index, json_line in enumerate(open(in_manifest)): + out_manifest = [] + for index, json_line in enumerate(open(in_manifest_path, 'r')): if (index % trainer_count) == trainer_id: - json_data = json.loads(json_line) - json_data['audio_filepath'] = "tar:%s#%s" % ( - tar_path, json_data['audio_filepath']) - result.append("%s\n" % json.dumps(json_data)) - with open(out_manifest, 'w') as manifest: - manifest.writelines(result) + out_manifest.append("%s\n" % json_line.strip()) + with open(out_manifest_path, 'w') as f: + f.writelines(out_manifest) if __name__ == '__main__': - split_data(args.in_manifest_path, args.data_tar_path, - args.out_manifest_path) + split_data(args.in_manifest_path, args.out_manifest_path) diff --git a/cloud/upload_data.py b/cloud/upload_data.py index efa9e77c0..668575740 100644 --- a/cloud/upload_data.py +++ b/cloud/upload_data.py @@ -1,12 +1,9 @@ -"""This script is used for preparing data for DeepSpeech2 trainning on paddle -cloud. +"""This script is for uploading data for DeepSpeech2 training on paddlecloud. Steps: -1. Read original manifest and get the local path of sound files. -2. Tar all local sound files into one tar file. -3. Modify original manifest to remove the local path information. - -Finally, we will get a tar file and a new manifest. +1. Read original manifests and extract local sound files. +2. Tar all local sound files into multiple tar files and upload them. +3. Modify original manifests with updated paths in cloud filesystem. """ from __future__ import absolute_import from __future__ import division @@ -22,66 +19,81 @@ from subprocess import call import _init_paths from data_utils.utils import read_manifest -TRAIN_TAR = "cloud.train.tar" -TRAIN_MANIFEST = "cloud.train.manifest" -DEV_TAR = "cloud.dev.tar" -DEV_MANIFEST = "cloud.dev.manifest" -VOCAB_FILE = "vocab.txt" -MEAN_STD_FILE = "mean_std.npz" - parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( - "--train_manifest_path", - default="../datasets/manifest.train", - type=str, - help="Manifest file path for train data. (default: %(default)s)") -parser.add_argument( - "--dev_manifest_path", - default="../datasets/manifest.dev", + "--in_manifest_paths", + default=["../datasets/manifest.test", "../datasets/manifest.dev"], type=str, - help="Manifest file path for validation data. (default: %(default)s)") -parser.add_argument( - "--vocab_file", - default="../datasets/vocab/eng_vocab.txt", - type=str, - help="Vocabulary file to be uploaded to paddlecloud. " + nargs='+', + help="Local filepaths of input manifests to load, pack and upload." "(default: %(default)s)") parser.add_argument( - "--mean_std_file", - default="../mean_std.npz", + "--out_manifest_paths", + default=["./cloud.manifest.test", "./cloud.manifest.dev"], type=str, - help="Normalizer's statistics (mean and stddev) file to be uploaded to " - "paddlecloud. (default: %(default)s)") + nargs='+', + help="Local filepaths of modified manifests to write to. " + "(default: %(default)s)") parser.add_argument( - "--cloud_data_path", + "--cloud_data_dir", required=True, type=str, - help="Destination path on paddlecloud. (default: %(default)s)") + help="Destination directory on paddlecloud to upload data to.") +parser.add_argument( + "--num_shards", + default=10, + type=int, + help="Number of parts to split data to. (default: %(default)s)") parser.add_argument( - "--local_tmp_path", + "--local_tmp_dir", default="./tmp/", type=str, help="Local directory for storing temporary data. (default: %(default)s)") args = parser.parse_args() -def pack_data(manifest_path, out_tar_path, out_manifest_path): - """1. According to the manifest, tar sound files into out_tar_path. - 2. Generate a new manifest for output tar file. +def upload_data(in_manifest_path_list, out_manifest_path_list, local_tmp_dir, + upload_tar_dir, num_shards): + """Extract and pack sound files listed in the manifest files into multple + tar files and upload them to padldecloud. Besides, generate new manifest + files with updated paths in paddlecloud. """ - out_tar = tarfile.open(out_tar_path, 'w') - manifest = read_manifest(manifest_path) - results = [] - for json_data in manifest: - sound_file = json_data['audio_filepath'] - filename = os.path.basename(sound_file) - out_tar.add(sound_file, arcname=filename) - json_data['audio_filepath'] = filename - results.append("%s\n" % json.dumps(json_data)) - with open(out_manifest_path, 'w') as out_manifest: - out_manifest.writelines(results) - out_manifest.close() - out_tar.close() + # compute total audio number + total_line = 0 + for manifest_path in in_manifest_path_list: + with open(manifest_path, 'r') as f: + total_line += len(f.readlines()) + line_per_tar = (total_line // num_shards) + 1 + + # pack and upload shard by shard + line_count, tar_file = 0, None + for manifest_path, out_manifest_path in zip(in_manifest_path_list, + out_manifest_path_list): + manifest = read_manifest(manifest_path) + out_manifest = [] + for json_data in manifest: + sound_filepath = json_data['audio_filepath'] + sound_filename = os.path.basename(sound_filepath) + if line_count % line_per_tar == 0: + if tar_file != None: + tar_file.close() + pcloud_cp(tar_path, upload_tar_dir) + os.remove(tar_path) + tar_name = 'part-%s-of-%s.tar' % ( + str(line_count // line_per_tar).zfill(5), + str(num_shards).zfill(5)) + tar_path = os.path.join(local_tmp_dir, tar_name) + tar_file = tarfile.open(tar_path, 'w') + tar_file.add(sound_filepath, arcname=sound_filename) + line_count += 1 + json_data['audio_filepath'] = "tar:%s#%s" % ( + os.path.join(upload_tar_dir, tar_name), sound_filename) + out_manifest.append("%s\n" % json.dumps(json_data)) + with open(out_manifest_path, 'w') as f: + f.writelines(out_manifest) + tar_file.close() + pcloud_cp(tar_path, upload_tar_dir) + os.remove(tar_path) def pcloud_mkdir(dir): @@ -99,44 +111,12 @@ def pcloud_cp(src, dst): raise IOError("PaddleCloud cp failed: from [%s] to [%s]." % (src, dst)) -def pcloud_exist(path): - """Check if file or directory exists in PaddleCloud filesystem. - """ - ret = call(['paddlecloud', 'ls', path]) - return ret - - if __name__ == '__main__': - cloud_train_manifest = os.path.join(args.cloud_data_path, TRAIN_MANIFEST) - cloud_train_tar = os.path.join(args.cloud_data_path, TRAIN_TAR) - cloud_dev_manifest = os.path.join(args.cloud_data_path, DEV_MANIFEST) - cloud_dev_tar = os.path.join(args.cloud_data_path, DEV_TAR) - cloud_vocab_file = os.path.join(args.cloud_data_path, VOCAB_FILE) - cloud_mean_file = os.path.join(args.cloud_data_path, MEAN_STD_FILE) - - local_train_manifest = os.path.join(args.local_tmp_path, TRAIN_MANIFEST) - local_train_tar = os.path.join(args.local_tmp_path, TRAIN_TAR) - local_dev_manifest = os.path.join(args.local_tmp_path, DEV_MANIFEST) - local_dev_tar = os.path.join(args.local_tmp_path, DEV_TAR) - - # prepare local and cloud dir - if os.path.exists(args.local_tmp_path): - shutil.rmtree(args.local_tmp_path) - os.makedirs(args.local_tmp_path) - pcloud_mkdir(args.cloud_data_path) - - # pack and upload train data - pack_data(args.train_manifest_path, local_train_tar, local_train_manifest) - pcloud_cp(local_train_manifest, cloud_train_manifest) - pcloud_cp(local_train_tar, cloud_train_tar) - - # pack and upload validation data - pack_data(args.dev_manifest_path, local_dev_tar, local_dev_manifest) - pcloud_cp(local_dev_manifest, cloud_dev_manifest) - pcloud_cp(local_dev_tar, cloud_dev_tar) + if not os.path.exists(args.local_tmp_dir): + os.makedirs(args.local_tmp_dir) + pcloud_mkdir(args.cloud_data_dir) - # upload vocab file and mean_std file - pcloud_cp(args.vocab_file, cloud_vocab_file) - pcloud_cp(args.mean_std_file, cloud_mean_file) + upload_data(args.in_manifest_paths, args.out_manifest_paths, + args.local_tmp_dir, args.cloud_data_dir, 10) - shutil.rmtree(args.local_tmp_path) + shutil.rmtree(args.local_tmp_dir) From da28015556fb58594ba78a7e6725b7da1f1d0356 Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Tue, 15 Aug 2017 18:11:23 +0800 Subject: [PATCH 140/335] Update README for DS2 cloud training. --- README.md | 8 +++- cloud/README.md | 97 ++++++++++++++++++++++--------------------------- 2 files changed, 50 insertions(+), 55 deletions(-) mode change 100644 => 100755 README.md mode change 100644 => 100755 cloud/README.md diff --git a/README.md b/README.md old mode 100644 new mode 100755 index 9c2a0872b..263339415 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Deep Speech 2 on PaddlePaddle +# DeepSpeech2 on PaddlePaddle ## Installation @@ -161,3 +161,9 @@ python demo_client.py On the client console, press and hold the "white-space" key on the keyboard to start talking, until you finish your speech and then release the "white-space" key. The decoding results (infered transcription) will be displayed. It could be possible to start the server and the client in two seperate machines, e.g. `demo_client.py` is usually started in a machine with a microphone hardware, while `demo_server.py` is usually started in a remote server with powerful GPUs. Please first make sure that these two machines have network access to each other, and then use `--host_ip` and `--host_port` to indicate the server machine's actual IP address (instead of the `localhost` as default) and TCP port, in both `demo_server.py` and `demo_client.py`. + + +## PaddleCloud Training + +If you wish to train DeepSpeech2 on PaddleCloud, please refer to +[Train DeepSpeech2 on PaddleCloud](https://github.com/PaddlePaddle/models/tree/develop/deep_speech_2/cloud). diff --git a/cloud/README.md b/cloud/README.md old mode 100644 new mode 100755 index 8e7e49f9e..274fe3741 --- a/cloud/README.md +++ b/cloud/README.md @@ -1,13 +1,15 @@ -# Run DS2 on PaddleCloud +# Train DeepSpeech2 on PaddleCloud >Note: ->Make sure [PaddleCloud client](https://github.com/PaddlePaddle/cloud/blob/develop/doc/usage_cn.md#%E4%B8%8B%E8%BD%BD%E5%B9%B6%E9%85%8D%E7%BD%AEpaddlecloud) has be installed and current directory is `models/deep_speech_2/cloud/` +>Please make sure [PaddleCloud Client](https://github.com/PaddlePaddle/cloud/blob/develop/doc/usage_cn.md#%E4%B8%8B%E8%BD%BD%E5%B9%B6%E9%85%8D%E7%BD%AEpaddlecloud) has be installed and current directory is `deep_speech_2/cloud/` -## Step-1 Configure data set +## Step 1: Upload Data -Configure your input data and output path in pcloud_submit.sh: +Provided with several input manifests, `pcloud_upload_data.sh` will pack and upload all the containing audio files to PaddleCloud filesystem, and also generate some corresponding manifest files with updated cloud paths. -- `TRAIN_MANIFEST`: Absolute path of train data manifest file in local file system.This file has format as bellow: +Please modify the following arguments in `pcloud_upload_data.sh`: + +- `IN_MANIFESTS`: Paths (in local filesystem) of manifest files containing the audio files to be uploaded. Multiple paths can be concatenated with a whitespace delimeter. Lines of manifest files are in the following format: ``` {"audio_filepath": "/home/disk1/LibriSpeech/dev-clean/1272/128104/1272-128104-0000.flac", "duration": 5.855, "text @@ -15,67 +17,54 @@ Configure your input data and output path in pcloud_submit.sh: {"audio_filepath": "/home/disk1/LibriSpeech/dev-clean/1272/128104/1272-128104-0001.flac", "duration": 4.815, "text ": "nor is mister ..."} ``` +- `OUT_MANIFESTS`: Paths (in local filesystem) to write the updated output manifest files to. Multiple paths can be concatenated with a whitespace delimeter. The values of `audio_filepath` in the output manifests are jjjjjkknew paths in PaddleCloud filesystem. +- `CLOUD_DATA_DIR`: Directory (in PaddleCloud filesystem) to upload the data to. +- `NUM_SHARDS`: Number of data shards / parts (in tar files) to be generated when packing and uploading data. Smaller `num_shards` requires larger temoporal local disk space for packing data. -- `TEST_MANIFEST`: Absolute path of train data manifest file in local filesystem. This file has format like `TRAIN_MANIFEST`. -- `VOCAB_FILE`: Absolute path of vocabulary file in local filesytem. -- `MEAN_STD_FILE`: Absolute path of normalizer's statistic file in local filesytem. -- `CLOUD_DATA_DIR:` Absolute path in PaddleCloud filesystem. We will upload local train data to this directory. -- `CLOUD_MODEL_DIR`: Absolute path in PaddleCloud filesystem. PaddleCloud trainer will save model to this directory. +By running: ->Note: Upload will be skipped if target file has existed in `CLOUD_DATA_DIR`. +``` +sh pcloud_upload_data.sh +``` +all the audio files will be uploaded to PaddleCloud filesystem, and you will get modified manifests files in `OUT_MANIFESTS`. -## Step-2 Configure computation resource +You have to take this step only once, when it is your first time to do the cloud training. Later on, the data is persisitent on the cloud filesystem and is reusable for multple jobs. -Configure computation resource in pcloud_submit.sh: +## Step 2: Configure Training -``` -# Configure computation resource and submit job to PaddleCloud - paddlecloud submit \ - -image wanghaoshuang/pcloud_ds2:latest \ - -jobname ${JOB_NAME} \ - -cpu 4 \ - -gpu 4 \ - -memory 10Gi \ - -parallelism 1 \ - -pscpu 1 \ - -pservers 1 \ - -psmemory 10Gi \ - -passes 1 \ - -entry "sh pcloud_train.sh ${CLOUD_DATA_DIR} ${CLOUD_MODEL_DIR}" \ - ${DS2_PATH} -``` -For more information, please refer to [PaddleCloud](https://github.com/PaddlePaddle/cloud/blob/develop/doc/usage_cn.md#提交任务) +Configure cloud training arguments in `pcloud_submit.sh`, with the following arguments: -## Step-3 Configure algorithm options -Configure algorithm options in pcloud_train.sh: -``` -python train.py \ ---use_gpu=1 \ ---trainer_count=4 \ ---batch_size=256 \ ---mean_std_filepath=$MEAN_STD_FILE \ ---train_manifest_path='./local.train.manifest' \ ---dev_manifest_path='./local.test.manifest' \ ---vocab_filepath=$VOCAB_PATH \ ---output_model_dir=${MODEL_PATH} -``` -You can get more information about algorithm options by follow command: -``` -cd .. -python train.py --help -``` +- `TRAIN_MANIFEST`: Manifest filepath (in local filesystem) for training. Notice that the`audio_filepath` should be in cloud filesystem, like those generated by `pcloud_upload_data.sh`. +- `DEV_MANIFEST`: Manifest filepath (in local filesystem) for validation. +- `CLOUD_MODEL_DIR`: Directory (in PaddleCloud filesystem) to save the model parameters (checkpoints). +- `BATCH_SIZE`: Training batch size for a single node. +- `NUM_GPU`: Number of GPUs allocated for a single node. +- `NUM_NODE`: Number of nodes (machines) allocated for this job. +- `IS_LOCAL`: Set to False to enable parameter server, if using multiple nodes. + +Configure other training hyper-parameters in `pcloud_train.sh` as you wish, just as what you can do in local training. + +By running: -## Step-4 Submit job ``` -$ sh pcloud_submit.sh +sh pcloud_submit.sh ``` +you submit a training job to PaddleCloud. And you will see the job name when the submission is done. + +## Step 3 Get Job Logs + +Run this to list all the jobs you have submitted, as well as their running status: -## Step-5 Get logs ``` -$ paddlecloud logs -n 10000 deepspeech20170727130129 +paddlecloud get jobs ``` -For more information, please refer to [PaddleCloud client](https://github.com/PaddlePaddle/cloud/blob/develop/doc/usage_cn.md#下载并配置paddlecloud) or get help by follow command: + +Run this, the corresponding job's logs will be printed. ``` -paddlecloud --help +paddlecloud logs -n 10000 $REPLACED_WITH_YOUR_ACTUAL_JOB_NAME ``` + +## More Help + +For more information about the usage of PaddleCloud, please refer to [PaddleCloud Usage](https://github.com/PaddlePaddle/cloud/blob/develop/doc/usage_cn.md#提交任务). From d5a357d173ad56869993d9f8dfef3380b799cf23 Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Tue, 15 Aug 2017 21:52:44 +0800 Subject: [PATCH 141/335] Update DS2 cloud training according to review comments. --- cloud/README.md | 17 +++++------------ cloud/pcloud_submit.sh | 8 ++++---- cloud/pcloud_upload_data.sh | 8 ++++---- cloud/upload_data.py | 13 ++++++++++--- 4 files changed, 23 insertions(+), 23 deletions(-) diff --git a/cloud/README.md b/cloud/README.md index 274fe3741..a5be1c420 100755 --- a/cloud/README.md +++ b/cloud/README.md @@ -9,16 +9,9 @@ Provided with several input manifests, `pcloud_upload_data.sh` will pack and upl Please modify the following arguments in `pcloud_upload_data.sh`: -- `IN_MANIFESTS`: Paths (in local filesystem) of manifest files containing the audio files to be uploaded. Multiple paths can be concatenated with a whitespace delimeter. Lines of manifest files are in the following format: - -``` -{"audio_filepath": "/home/disk1/LibriSpeech/dev-clean/1272/128104/1272-128104-0000.flac", "duration": 5.855, "text -": "mister quilter is the ..."} -{"audio_filepath": "/home/disk1/LibriSpeech/dev-clean/1272/128104/1272-128104-0001.flac", "duration": 4.815, "text -": "nor is mister ..."} -``` -- `OUT_MANIFESTS`: Paths (in local filesystem) to write the updated output manifest files to. Multiple paths can be concatenated with a whitespace delimeter. The values of `audio_filepath` in the output manifests are jjjjjkknew paths in PaddleCloud filesystem. -- `CLOUD_DATA_DIR`: Directory (in PaddleCloud filesystem) to upload the data to. +- `IN_MANIFESTS`: Paths (in local filesystem) of manifest files containing the audio files to be uploaded. Multiple paths can be concatenated with a whitespace delimeter. +- `OUT_MANIFESTS`: Paths (in local filesystem) to write the updated output manifest files to. Multiple paths can be concatenated with a whitespace delimeter. The values of `audio_filepath` in the output manifests are updated with cloud filesystem paths. +- `CLOUD_DATA_DIR`: Directory (in PaddleCloud filesystem) to upload the data to. Don't forget to replace `USERNAME` in the default directory and make sure that you have the permission to write it. - `NUM_SHARDS`: Number of data shards / parts (in tar files) to be generated when packing and uploading data. Smaller `num_shards` requires larger temoporal local disk space for packing data. By running: @@ -28,7 +21,7 @@ sh pcloud_upload_data.sh ``` all the audio files will be uploaded to PaddleCloud filesystem, and you will get modified manifests files in `OUT_MANIFESTS`. -You have to take this step only once, when it is your first time to do the cloud training. Later on, the data is persisitent on the cloud filesystem and is reusable for multple jobs. +You have to take this step only once, in the very first time you do the cloud training. Later on, the data is persisitent on the cloud filesystem and reusable for further job submissions. ## Step 2: Configure Training @@ -36,7 +29,7 @@ Configure cloud training arguments in `pcloud_submit.sh`, with the following arg - `TRAIN_MANIFEST`: Manifest filepath (in local filesystem) for training. Notice that the`audio_filepath` should be in cloud filesystem, like those generated by `pcloud_upload_data.sh`. - `DEV_MANIFEST`: Manifest filepath (in local filesystem) for validation. -- `CLOUD_MODEL_DIR`: Directory (in PaddleCloud filesystem) to save the model parameters (checkpoints). +- `CLOUD_MODEL_DIR`: Directory (in PaddleCloud filesystem) to save the model parameters (checkpoints). Don't forget to replace `USERNAME` in the default directory and make sure that you have the permission to write it. - `BATCH_SIZE`: Training batch size for a single node. - `NUM_GPU`: Number of GPUs allocated for a single node. - `NUM_NODE`: Number of nodes (machines) allocated for this job. diff --git a/cloud/pcloud_submit.sh b/cloud/pcloud_submit.sh index 35fe54f2c..a7fb42cbc 100644 --- a/cloud/pcloud_submit.sh +++ b/cloud/pcloud_submit.sh @@ -1,6 +1,6 @@ -TRAIN_MANIFEST="cloud/cloud.manifest.test" +TRAIN_MANIFEST="cloud/cloud.manifest.train" DEV_MANIFEST="cloud/cloud.manifest.dev" -CLOUD_MODEL_DIR="/pfs/dlnel/home/sunxinghai@baidu.com/deepspeech2/model" +CLOUD_MODEL_DIR="/pfs/dlnel/home/USERNAME/deepspeech2/model" BATCH_SIZE=256 NUM_GPU=8 NUM_NODE=1 @@ -15,11 +15,11 @@ paddlecloud submit \ -jobname ${JOB_NAME} \ -cpu ${NUM_GPU} \ -gpu ${NUM_GPU} \ --memory 10Gi \ +-memory 64Gi \ -parallelism ${NUM_NODE} \ -pscpu 1 \ -pservers 1 \ --psmemory 10Gi \ +-psmemory 64Gi \ -passes 1 \ -entry "sh pcloud_train.sh ${TRAIN_MANIFEST} ${DEV_MANIFEST} ${CLOUD_MODEL_DIR} ${NUM_GPU} ${BATCH_SIZE} ${IS_LOCAL}" \ ${DS2_PATH} diff --git a/cloud/pcloud_upload_data.sh b/cloud/pcloud_upload_data.sh index 1422b8a10..97a0ab181 100644 --- a/cloud/pcloud_upload_data.sh +++ b/cloud/pcloud_upload_data.sh @@ -1,7 +1,7 @@ -IN_MANIFESTS="../datasets/manifest.tmp ../datasets/manifest.dev ../datasets/manifest.test" -OUT_MANIFESTS="./cloud.manifest.tmp ./cloud.manifest.dev ./cloud.manifest.test" -CLOUD_DATA_DIR="/pfs/dlnel/home/sunxinghai@baidu.com/deepspeech2/data" -NUM_SHARDS=10 +IN_MANIFESTS="../datasets/manifest.train ../datasets/manifest.dev ../datasets/manifest.test" +OUT_MANIFESTS="./cloud.manifest.train ./cloud.manifest.dev ./cloud.manifest.test" +CLOUD_DATA_DIR="/pfs/dlnel/home/USERNAME/deepspeech2/data/librispeech" +NUM_SHARDS=50 python upload_data.py \ --in_manifest_paths ${IN_MANIFESTS} \ diff --git a/cloud/upload_data.py b/cloud/upload_data.py index 668575740..9973f8c76 100644 --- a/cloud/upload_data.py +++ b/cloud/upload_data.py @@ -22,14 +22,20 @@ from data_utils.utils import read_manifest parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( "--in_manifest_paths", - default=["../datasets/manifest.test", "../datasets/manifest.dev"], + default=[ + "../datasets/manifest.train", "../datasets/manifest.dev", + "../datasets/manifest.test" + ], type=str, nargs='+', help="Local filepaths of input manifests to load, pack and upload." "(default: %(default)s)") parser.add_argument( "--out_manifest_paths", - default=["./cloud.manifest.test", "./cloud.manifest.dev"], + default=[ + "./cloud.manifest.train", "./cloud.manifest.dev", + "./cloud.manifest.test" + ], type=str, nargs='+', help="Local filepaths of modified manifests to write to. " @@ -91,6 +97,7 @@ def upload_data(in_manifest_path_list, out_manifest_path_list, local_tmp_dir, out_manifest.append("%s\n" % json.dumps(json_data)) with open(out_manifest_path, 'w') as f: f.writelines(out_manifest) + pcloud_cp(out_manifest_path, upload_tar_dir) tar_file.close() pcloud_cp(tar_path, upload_tar_dir) os.remove(tar_path) @@ -117,6 +124,6 @@ if __name__ == '__main__': pcloud_mkdir(args.cloud_data_dir) upload_data(args.in_manifest_paths, args.out_manifest_paths, - args.local_tmp_dir, args.cloud_data_dir, 10) + args.local_tmp_dir, args.cloud_data_dir, args.num_shards) shutil.rmtree(args.local_tmp_dir) From 5e13fd7dadddf18c5af36c0a4d94683a746edab6 Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Wed, 16 Aug 2017 17:27:28 +0800 Subject: [PATCH 142/335] deep speech2 can directly use warpctc instead by export LD_LIBRARY_PATH --- .gitignore | 3 +++ README.md | 1 - 2 files changed, 3 insertions(+), 1 deletion(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..db0537f3b --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +manifest* +mean_std.npz +thirdparty/ diff --git a/README.md b/README.md index 263339415..4e8befa5b 100755 --- a/README.md +++ b/README.md @@ -4,7 +4,6 @@ ``` sh setup.sh -export LD_LIBRARY_PATH=$PADDLE_INSTALL_DIR/Paddle/third_party/install/warpctc/lib:$LD_LIBRARY_PATH ``` Please replace `$PADDLE_INSTALL_DIR` with your own paddle installation directory. From 638fae13f4925b993b43af4ba748a480e27e5e52 Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Thu, 17 Aug 2017 10:16:00 +0800 Subject: [PATCH 143/335] use wget to download --- setup.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.sh b/setup.sh index 7f4272550..6c8a70994 100644 --- a/setup.sh +++ b/setup.sh @@ -13,7 +13,7 @@ fi python -c "import soundfile" if [ $? != 0 ]; then echo "Install package libsndfile into default system path." - curl -O "http://www.mega-nerd.com/libsndfile/files/libsndfile-1.0.28.tar.gz" + wget "http://www.mega-nerd.com/libsndfile/files/libsndfile-1.0.28.tar.gz" if [ $? != 0 ]; then echo "Download libsndfile-1.0.28.tar.gz failed !!!" exit 1 From be37b03f0c4c181f3921697bfaf5a17a50f11b51 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Thu, 17 Aug 2017 11:10:49 +0800 Subject: [PATCH 144/335] Fix a typo caused exception for audio_featurizer.py. --- data_utils/featurizer/audio_featurizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data_utils/featurizer/audio_featurizer.py b/data_utils/featurizer/audio_featurizer.py index f0d223cfb..39f453017 100644 --- a/data_utils/featurizer/audio_featurizer.py +++ b/data_utils/featurizer/audio_featurizer.py @@ -57,7 +57,7 @@ class AudioFeaturizer(object): def featurize(self, audio_segment, allow_downsampling=True, - allow_upsamplling=True): + allow_upsampling=True): """Extract audio features from AudioSegment or SpeechSegment. :param audio_segment: Audio/speech segment to extract features from. From 5a632758450911eff0b0421aa111be5141a4a71a Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Mon, 21 Aug 2017 21:54:28 +0800 Subject: [PATCH 145/335] Add GRU support. --- demo_server.py | 6 +++++ evaluate.py | 6 +++++ infer.py | 6 +++++ layer.py | 64 ++++++++++++++++++++++++++++++++++++++++++++------ model.py | 9 +++---- train.py | 8 ++++++- tune.py | 6 +++++ 7 files changed, 93 insertions(+), 12 deletions(-) diff --git a/demo_server.py b/demo_server.py index c7e7e94a4..60d972393 100644 --- a/demo_server.py +++ b/demo_server.py @@ -66,6 +66,11 @@ parser.add_argument( default=512, type=int, help="RNN layer cell number. (default: %(default)s)") +parser.add_argument( + "--use_gru", + default=True, + type=bool, + help="Use GRU or simple RNN. (default: %(default)s)") parser.add_argument( "--use_gpu", default=True, @@ -199,6 +204,7 @@ def start_server(): num_conv_layers=args.num_conv_layers, num_rnn_layers=args.num_rnn_layers, rnn_layer_size=args.rnn_layer_size, + use_gru=args.use_gru, pretrained_model_path=args.model_filepath) # prepare ASR inference handler diff --git a/evaluate.py b/evaluate.py index 82dcec3c2..2f87abbde 100644 --- a/evaluate.py +++ b/evaluate.py @@ -38,6 +38,11 @@ parser.add_argument( default=512, type=int, help="RNN layer cell number. (default: %(default)s)") +parser.add_argument( + "--use_gru", + default=True, + type=bool, + help="Use GRU or simple RNN. (default: %(default)s)") parser.add_argument( "--use_gpu", default=True, @@ -142,6 +147,7 @@ def evaluate(): num_conv_layers=args.num_conv_layers, num_rnn_layers=args.num_rnn_layers, rnn_layer_size=args.rnn_layer_size, + use_gru=args.use_gru, pretrained_model_path=args.model_filepath) error_rate_func = cer if args.error_rate_type == 'cer' else wer diff --git a/infer.py b/infer.py index 43643cde7..91b08932c 100644 --- a/infer.py +++ b/infer.py @@ -33,6 +33,11 @@ parser.add_argument( default=512, type=int, help="RNN layer cell number. (default: %(default)s)") +parser.add_argument( + "--use_gru", + default=True, + type=bool, + help="Use GRU or simple RNN. (default: %(default)s)") parser.add_argument( "--use_gpu", default=True, @@ -143,6 +148,7 @@ def infer(): num_conv_layers=args.num_conv_layers, num_rnn_layers=args.num_rnn_layers, rnn_layer_size=args.rnn_layer_size, + use_gru=args.use_gru, pretrained_model_path=args.model_filepath) result_transcripts = ds2_model.infer_batch( infer_data=infer_data, diff --git a/layer.py b/layer.py index 3b492645d..1b1a58104 100644 --- a/layer.py +++ b/layer.py @@ -57,7 +57,7 @@ def bidirectional_simple_rnn_bn_layer(name, input, size, act): # input-hidden weights shared across bi-direcitonal rnn. input_proj = paddle.layer.fc( input=input, size=size, act=paddle.activation.Linear(), bias_attr=False) - # batch norm is only performed on input-state projection + # batch norm is only performed on input-state projection input_proj_bn = paddle.layer.batch_norm( input=input_proj, act=paddle.activation.Linear()) # forward and backward in time @@ -68,6 +68,38 @@ def bidirectional_simple_rnn_bn_layer(name, input, size, act): return paddle.layer.concat(input=[forward_simple_rnn, backward_simple_rnn]) +def bidirectional_gru_bn_layer(name, input, size, act): + """Bidirectonal gru layer with sequence-wise batch normalization. + The batch normalization is only performed on input-state weights. + + :param name: Name of the layer. + :type name: string + :param input: Input layer. + :type input: LayerOutput + :param size: Number of RNN cells. + :type size: int + :param act: Activation type. + :type act: BaseActivation + :return: Bidirectional simple rnn layer. + :rtype: LayerOutput + """ + # input-hidden weights shared across bi-direcitonal rnn. + input_proj = paddle.layer.fc( + input=input, + size=size * 3, + act=paddle.activation.Linear(), + bias_attr=False) + # batch norm is only performed on input-state projection + input_proj_bn = paddle.layer.batch_norm( + input=input_proj, act=paddle.activation.Linear()) + # forward and backward in time + forward_gru = paddle.layer.grumemory( + input=input_proj_bn, act=act, reverse=False) + backward_gru = paddle.layer.grumemory( + input=input_proj_bn, act=act, reverse=True) + return paddle.layer.concat(input=[forward_gru, backward_gru]) + + def conv_group(input, num_stacks): """Convolution group with stacked convolution layers. @@ -83,7 +115,7 @@ def conv_group(input, num_stacks): filter_size=(11, 41), num_channels_in=1, num_channels_out=32, - stride=(3, 2), + stride=(2, 2), padding=(5, 20), act=paddle.activation.BRelu()) for i in xrange(num_stacks - 1): @@ -100,7 +132,7 @@ def conv_group(input, num_stacks): return conv, output_num_channels, output_height -def rnn_group(input, size, num_stacks): +def rnn_group(input, size, num_stacks, use_gru): """RNN group with stacked bidirectional simple RNN layers. :param input: Input layer. @@ -109,13 +141,25 @@ def rnn_group(input, size, num_stacks): :type size: int :param num_stacks: Number of stacked rnn layers. :type num_stacks: int + :param use_gru: Use gru if set True. Use simple rnn if set False. + :type use_gru: bool :return: Output layer of the RNN group. :rtype: LayerOutput """ output = input for i in xrange(num_stacks): - output = bidirectional_simple_rnn_bn_layer( - name=str(i), input=output, size=size, act=paddle.activation.BRelu()) + if use_gru: + output = bidirectional_gru_bn_layer( + name=str(i), + input=output, + size=size, + act=paddle.activation.BRelu()) + else: + output = bidirectional_simple_rnn_bn_layer( + name=str(i), + input=output, + size=size, + act=paddle.activation.BRelu()) return output @@ -124,7 +168,8 @@ def deep_speech2(audio_data, dict_size, num_conv_layers=2, num_rnn_layers=3, - rnn_size=256): + rnn_size=256, + use_gru=True): """ The whole DeepSpeech2 model structure (a simplified version). @@ -140,6 +185,8 @@ def deep_speech2(audio_data, :type num_rnn_layers: int :param rnn_size: RNN layer size (number of RNN cells). :type rnn_size: int + :param use_gru: Use gru if set True. Use simple rnn if set False. + :type use_gru: bool :return: A tuple of an output unnormalized log probability layer ( before softmax) and a ctc cost layer. :rtype: tuple of LayerOutput @@ -157,7 +204,10 @@ def deep_speech2(audio_data, block_y=conv_group_height) # rnn group rnn_group_output = rnn_group( - input=conv2seq, size=rnn_size, num_stacks=num_rnn_layers) + input=conv2seq, + size=rnn_size, + num_stacks=num_rnn_layers, + use_gru=use_gru) fc = paddle.layer.fc( input=rnn_group_output, size=dict_size + 1, diff --git a/model.py b/model.py index 99412e595..eec971c00 100644 --- a/model.py +++ b/model.py @@ -30,9 +30,9 @@ class DeepSpeech2Model(object): """ def __init__(self, vocab_size, num_conv_layers, num_rnn_layers, - rnn_layer_size, pretrained_model_path): + rnn_layer_size, use_gru, pretrained_model_path): self._create_network(vocab_size, num_conv_layers, num_rnn_layers, - rnn_layer_size) + rnn_layer_size, use_gru) self._create_parameters(pretrained_model_path) self._inferer = None self._loss_inferer = None @@ -226,7 +226,7 @@ class DeepSpeech2Model(object): gzip.open(model_path)) def _create_network(self, vocab_size, num_conv_layers, num_rnn_layers, - rnn_layer_size): + rnn_layer_size, use_gru): """Create data layers and model network.""" # paddle.data_type.dense_array is used for variable batch input. # The size 161 * 161 is only an placeholder value and the real shape @@ -243,4 +243,5 @@ class DeepSpeech2Model(object): dict_size=vocab_size, num_conv_layers=num_conv_layers, num_rnn_layers=num_rnn_layers, - rnn_size=rnn_layer_size) + rnn_size=rnn_layer_size, + use_gru=use_gru) diff --git a/train.py b/train.py index 262d8bf01..8e95d7bc8 100644 --- a/train.py +++ b/train.py @@ -37,9 +37,14 @@ parser.add_argument( help="RNN layer number. (default: %(default)s)") parser.add_argument( "--rnn_layer_size", - default=512, + default=1280, type=int, help="RNN layer cell number. (default: %(default)s)") +parser.add_argument( + "--use_gru", + default=True, + type=bool, + help="Use GRU or simple RNN. (default: %(default)s)") parser.add_argument( "--adam_learning_rate", default=5e-4, @@ -170,6 +175,7 @@ def train(): num_conv_layers=args.num_conv_layers, num_rnn_layers=args.num_rnn_layers, rnn_layer_size=args.rnn_layer_size, + use_gru=args.use_gru, pretrained_model_path=args.init_model_path) ds2_model.train( train_batch_reader=train_batch_reader, diff --git a/tune.py b/tune.py index 328d67a11..8a9b5b610 100644 --- a/tune.py +++ b/tune.py @@ -34,6 +34,11 @@ parser.add_argument( default=512, type=int, help="RNN layer cell number. (default: %(default)s)") +parser.add_argument( + "--use_gru", + default=True, + type=bool, + help="Use GRU or simple RNN. (default: %(default)s)") parser.add_argument( "--use_gpu", default=True, @@ -158,6 +163,7 @@ def tune(): num_conv_layers=args.num_conv_layers, num_rnn_layers=args.num_rnn_layers, rnn_layer_size=args.rnn_layer_size, + use_gru=args.use_gru, pretrained_model_path=args.model_filepath) # create grid for search From d7a2c0e9908e6cc2ceba41aaed43931464091373 Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Mon, 21 Aug 2017 22:00:01 +0800 Subject: [PATCH 146/335] Replace activator BRelu with Relu. --- layer.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/layer.py b/layer.py index 1b1a58104..c4055aaa3 100644 --- a/layer.py +++ b/layer.py @@ -117,7 +117,7 @@ def conv_group(input, num_stacks): num_channels_out=32, stride=(2, 2), padding=(5, 20), - act=paddle.activation.BRelu()) + act=paddle.activation.Relu()) for i in xrange(num_stacks - 1): conv = conv_bn_layer( input=conv, @@ -126,7 +126,7 @@ def conv_group(input, num_stacks): num_channels_out=32, stride=(1, 2), padding=(5, 10), - act=paddle.activation.BRelu()) + act=paddle.activation.Relu()) output_num_channels = 32 output_height = 160 // pow(2, num_stacks) + 1 return conv, output_num_channels, output_height @@ -153,13 +153,13 @@ def rnn_group(input, size, num_stacks, use_gru): name=str(i), input=output, size=size, - act=paddle.activation.BRelu()) + act=paddle.activation.Relu()) else: output = bidirectional_simple_rnn_bn_layer( name=str(i), input=output, size=size, - act=paddle.activation.BRelu()) + act=paddle.activation.Relu()) return output From 1b707054a97237a3c0b7ad311e9dc20dd3686686 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Tue, 22 Aug 2017 16:19:57 +0800 Subject: [PATCH 147/335] reorganize cpp files --- deploy.py | 6 +++--- deploy/ctc_decoders.cpp | 4 +++- deploy/ctc_decoders.h | 2 +- deploy/ctc_decoders.i | 1 + deploy/decoder_setup.py | 6 ++++-- deploy/decoder_utils.cpp | 5 +++++ deploy/decoder_utils.h | 15 +++++++++++++++ deploy/scorer.cpp | 12 ++++++------ deploy/scorer.h | 10 +++++----- deploy/swig_decoders.py | 28 ++++++++++++++++++++++++++-- 10 files changed, 69 insertions(+), 20 deletions(-) create mode 100644 deploy/decoder_utils.cpp create mode 100644 deploy/decoder_utils.h diff --git a/deploy.py b/deploy.py index 70a9b9efe..091d82892 100644 --- a/deploy.py +++ b/deploy.py @@ -11,7 +11,7 @@ import paddle.v2 as paddle from data_utils.data import DataGenerator from model import deep_speech2 from deploy.swig_decoders import * -from swig_scorer import LmScorer +from swig_scorer import Scorer from error_rate import wer import utils import time @@ -19,7 +19,7 @@ import time parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( "--num_samples", - default=100, + default=10, type=int, help="Number of samples for inference. (default: %(default)s)") parser.add_argument( @@ -164,7 +164,7 @@ def infer(): ] # external scorer - ext_scorer = LmScorer(args.alpha, args.beta, args.language_model_path) + ext_scorer = Scorer(args.alpha, args.beta, args.language_model_path) ## decode and print time_begin = time.time() diff --git a/deploy/ctc_decoders.cpp b/deploy/ctc_decoders.cpp index 4cff6d5e5..75555c018 100644 --- a/deploy/ctc_decoders.cpp +++ b/deploy/ctc_decoders.cpp @@ -5,9 +5,11 @@ #include #include #include "ctc_decoders.h" +#include "decoder_utils.h" typedef double log_prob_type; + template bool pair_comp_first_rev(const std::pair a, const std::pair b) { @@ -81,7 +83,7 @@ std::vector > std::vector vocabulary, int blank_id, double cutoff_prob, - LmScorer *ext_scorer, + Scorer *ext_scorer, bool nproc) { // dimension check int num_time_steps = probs_seq.size(); diff --git a/deploy/ctc_decoders.h b/deploy/ctc_decoders.h index da08a2c58..50a6014f0 100644 --- a/deploy/ctc_decoders.h +++ b/deploy/ctc_decoders.h @@ -28,7 +28,7 @@ std::vector > std::vector vocabulary, int blank_id, double cutoff_prob=1.0, - LmScorer *ext_scorer=NULL, + Scorer *ext_scorer=NULL, bool nproc=false ); diff --git a/deploy/ctc_decoders.i b/deploy/ctc_decoders.i index c7d05238e..8c9dd1643 100644 --- a/deploy/ctc_decoders.i +++ b/deploy/ctc_decoders.i @@ -19,4 +19,5 @@ namespace std{ } %import scorer.h +%import decoder_utils.h %include "ctc_decoders.h" diff --git a/deploy/decoder_setup.py b/deploy/decoder_setup.py index aed45faaf..146538f55 100644 --- a/deploy/decoder_setup.py +++ b/deploy/decoder_setup.py @@ -39,8 +39,10 @@ os.system('swig -python -c++ ./ctc_decoders.i') ctc_beam_search_decoder_module = [ Extension( name='_swig_ctc_decoders', - sources=FILES + - ['scorer.cpp', 'ctc_decoders_wrap.cxx', 'ctc_decoders.cpp'], + sources=FILES + [ + 'scorer.cpp', 'ctc_decoders_wrap.cxx', 'ctc_decoders.cpp', + 'decoder_utils.cpp' + ], language='C++', include_dirs=['.', './kenlm'], libraries=LIBS, diff --git a/deploy/decoder_utils.cpp b/deploy/decoder_utils.cpp new file mode 100644 index 000000000..82e4cd146 --- /dev/null +++ b/deploy/decoder_utils.cpp @@ -0,0 +1,5 @@ +#include +#include +#include +#include "decoder_utils.h" + diff --git a/deploy/decoder_utils.h b/deploy/decoder_utils.h new file mode 100644 index 000000000..6d58bf1f3 --- /dev/null +++ b/deploy/decoder_utils.h @@ -0,0 +1,15 @@ +#ifndef DECODER_UTILS_H +#define DECODER_UTILS_H +#pragma once +#include + +/* +template +bool pair_comp_first_rev(const std::pair a, const std::pair b); + +template +bool pair_comp_second_rev(const std::pair a, const std::pair b); + +template T log_sum_exp(T x, T y); +*/ +#endif // DECODER_UTILS_H diff --git a/deploy/scorer.cpp b/deploy/scorer.cpp index 7a66daad9..e9a74b989 100644 --- a/deploy/scorer.cpp +++ b/deploy/scorer.cpp @@ -7,7 +7,7 @@ using namespace lm::ngram; -LmScorer::LmScorer(float alpha, float beta, std::string lm_model_path) { +Scorer::Scorer(float alpha, float beta, std::string lm_model_path) { this->_alpha = alpha; this->_beta = beta; @@ -18,7 +18,7 @@ LmScorer::LmScorer(float alpha, float beta, std::string lm_model_path) { this->_language_model = LoadVirtual(lm_model_path.c_str()); } -LmScorer::~LmScorer(){ +Scorer::~Scorer(){ delete (lm::base::Model *)this->_language_model; } @@ -57,7 +57,7 @@ inline void strip(std::string &str, char ch=' ') { } } -int LmScorer::word_count(std::string sentence) { +int Scorer::word_count(std::string sentence) { strip(sentence); int cnt = 1; for (int i=0; i_language_model; State state, out_state; lm::FullScoreReturn ret; @@ -84,12 +84,12 @@ double LmScorer::language_model_score(std::string sentence) { return log_prob; } -void LmScorer::reset_params(float alpha, float beta) { +void Scorer::reset_params(float alpha, float beta) { this->_alpha = alpha; this->_beta = beta; } -double LmScorer::get_score(std::string sentence, bool log) { +double Scorer::get_score(std::string sentence, bool log) { double lm_score = language_model_score(sentence); int word_cnt = word_count(sentence); diff --git a/deploy/scorer.h b/deploy/scorer.h index 90a1a84a0..a18e119bc 100644 --- a/deploy/scorer.h +++ b/deploy/scorer.h @@ -8,10 +8,10 @@ * count and language model scoring. * Example: - * LmScorer ext_scorer(alpha, beta, "path_to_language_model.klm"); + * Scorer ext_scorer(alpha, beta, "path_to_language_model.klm"); * double score = ext_scorer.get_score("sentence_to_score"); */ -class LmScorer{ +class Scorer{ private: float _alpha; float _beta; @@ -23,9 +23,9 @@ private: double language_model_score(std::string); public: - LmScorer(){} - LmScorer(float alpha, float beta, std::string lm_model_path); - ~LmScorer(); + Scorer(){} + Scorer(float alpha, float beta, std::string lm_model_path); + ~Scorer(); // reset params alpha & beta void reset_params(float alpha, float beta); diff --git a/deploy/swig_decoders.py b/deploy/swig_decoders.py index 8e4a39252..0247c0c9e 100644 --- a/deploy/swig_decoders.py +++ b/deploy/swig_decoders.py @@ -4,7 +4,8 @@ from __future__ import division from __future__ import print_function import swig_ctc_decoders -import multiprocessing +#import multiprocessing +from pathos.multiprocessing import Pool def ctc_best_path_decoder(probs_seq, vocabulary): @@ -73,14 +74,37 @@ def ctc_beam_search_decoder_batch(probs_split, if not num_processes > 0: raise ValueError("Number of processes must be positive!") - pool = multiprocessing.Pool(processes=num_processes) + pool = Pool(processes=num_processes) results = [] + args_list = [] for i, probs_list in enumerate(probs_split): args = (probs_list, beam_size, vocabulary, blank_id, cutoff_prob, ext_scoring_func) + args_list.append(args) results.append(pool.apply_async(ctc_beam_search_decoder, args)) pool.close() pool.join() beam_search_results = [result.get() for result in results] + """ + len_args = len(probs_split) + beam_search_results = pool.map(ctc_beam_search_decoder, + probs_split, + [beam_size for i in xrange(len_args)], + [vocabulary for i in xrange(len_args)], + [blank_id for i in xrange(len_args)], + [cutoff_prob for i in xrange(len_args)], + [ext_scoring_func for i in xrange(len_args)] + ) + """ + ''' + processes = [mp.Process(target=ctc_beam_search_decoder, + args=(probs_list, beam_size, vocabulary, blank_id, cutoff_prob, + ext_scoring_func) for probs_list in probs_split] + for p in processes: + p.start() + for p in processes: + p.join() + beam_search_results = [] + ''' return beam_search_results From d1189a7950468d2252e9a99206dcac8f09e9ac75 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Tue, 22 Aug 2017 18:52:49 +0800 Subject: [PATCH 148/335] refine wrapper for swig and simplify setup --- deploy.py | 6 +-- deploy/README.md | 11 ++-- deploy/{ctc_decoders.i => decoders.i} | 5 +- deploy/scorer.i | 8 --- deploy/scorer_setup.py | 54 ------------------- deploy/{decoder_setup.py => setup.py} | 17 +++--- deploy/setup.sh | 11 ---- ...g_decoders.py => swig_decoders_wrapper.py} | 52 ++++++++---------- 8 files changed, 40 insertions(+), 124 deletions(-) rename deploy/{ctc_decoders.i => decoders.i} (91%) delete mode 100644 deploy/scorer.i delete mode 100644 deploy/scorer_setup.py rename deploy/{decoder_setup.py => setup.py} (75%) delete mode 100644 deploy/setup.sh rename deploy/{swig_decoders.py => swig_decoders_wrapper.py} (68%) diff --git a/deploy.py b/deploy.py index 091d82892..2d29973fb 100644 --- a/deploy.py +++ b/deploy.py @@ -10,8 +10,7 @@ import multiprocessing import paddle.v2 as paddle from data_utils.data import DataGenerator from model import deep_speech2 -from deploy.swig_decoders import * -from swig_scorer import Scorer +from deploy.swig_decoders_wrapper import * from error_rate import wer import utils import time @@ -164,7 +163,8 @@ def infer(): ] # external scorer - ext_scorer = Scorer(args.alpha, args.beta, args.language_model_path) + ext_scorer = Scorer( + alpha=args.alpha, beta=args.beta, model_path=args.language_model_path) ## decode and print time_begin = time.time() diff --git a/deploy/README.md b/deploy/README.md index c8dbd1c12..cf0c04391 100644 --- a/deploy/README.md +++ b/deploy/README.md @@ -1,19 +1,16 @@ ### Installation -The setup of the decoder for deployment depends on the source code of [kenlm](https://github.com/kpu/kenlm/), first clone it to current directory (i.e., `deep_speech_2/deploy`) +The setup of the decoder for deployment depends on the source code of [kenlm](https://github.com/kpu/kenlm/) and [openfst](http://www.openfst.org/twiki/bin/view/FST/WebHome), first clone kenlm and download openfst to current directory (i.e., `deep_speech_2/deploy`) ```shell git clone https://github.com/kpu/kenlm.git +wget http://www.openfst.org/twiki/pub/FST/FstDownload/openfst-1.6.3.tar.gz +tar -xzvf openfst-1.6.3.tar.gz ``` Then run the setup ```shell -sh setup.sh -``` - -After the installation succeeds, go back to the parent directory - -``` +python setup.py install cd .. ``` diff --git a/deploy/ctc_decoders.i b/deploy/decoders.i similarity index 91% rename from deploy/ctc_decoders.i rename to deploy/decoders.i index 8c9dd1643..04736e09e 100644 --- a/deploy/ctc_decoders.i +++ b/deploy/decoders.i @@ -1,5 +1,6 @@ -%module swig_ctc_decoders +%module swig_decoders %{ +#include "scorer.h" #include "ctc_decoders.h" %} @@ -18,6 +19,6 @@ namespace std{ %template(PairDoubleStringVector) std::vector >; } -%import scorer.h %import decoder_utils.h +%include "scorer.h" %include "ctc_decoders.h" diff --git a/deploy/scorer.i b/deploy/scorer.i deleted file mode 100644 index 8380e15a6..000000000 --- a/deploy/scorer.i +++ /dev/null @@ -1,8 +0,0 @@ -%module swig_scorer -%{ -#include "scorer.h" -%} - -%include "std_string.i" - -%include "scorer.h" diff --git a/deploy/scorer_setup.py b/deploy/scorer_setup.py deleted file mode 100644 index 3bb582724..000000000 --- a/deploy/scorer_setup.py +++ /dev/null @@ -1,54 +0,0 @@ -from setuptools import setup, Extension -import glob -import platform -import os - - -def compile_test(header, library): - dummy_path = os.path.join(os.path.dirname(__file__), "dummy") - command = "bash -c \"g++ -include " + header + " -l" + library + " -x c++ - <<<'int main() {}' -o " + dummy_path + " >/dev/null 2>/dev/null && rm " + dummy_path + " 2>/dev/null\"" - return os.system(command) == 0 - - -FILES = glob.glob('kenlm/util/*.cc') + glob.glob('kenlm/lm/*.cc') + glob.glob( - 'kenlm/util/double-conversion/*.cc') -FILES = [ - fn for fn in FILES if not (fn.endswith('main.cc') or fn.endswith('test.cc')) -] - -LIBS = ['stdc++'] -if platform.system() != 'Darwin': - LIBS.append('rt') - -ARGS = ['-O3', '-DNDEBUG', '-DKENLM_MAX_ORDER=6'] - -if compile_test('zlib.h', 'z'): - ARGS.append('-DHAVE_ZLIB') - LIBS.append('z') - -if compile_test('bzlib.h', 'bz2'): - ARGS.append('-DHAVE_BZLIB') - LIBS.append('bz2') - -if compile_test('lzma.h', 'lzma'): - ARGS.append('-DHAVE_XZLIB') - LIBS.append('lzma') - -os.system('swig -python -c++ ./scorer.i') - -ext_modules = [ - Extension( - name='_swig_scorer', - sources=FILES + ['scorer_wrap.cxx', 'scorer.cpp'], - language='C++', - include_dirs=['.', './kenlm'], - libraries=LIBS, - extra_compile_args=ARGS) -] - -setup( - name='swig_scorer', - version='0.1', - ext_modules=ext_modules, - include_package_data=True, - py_modules=['swig_scorer'], ) diff --git a/deploy/decoder_setup.py b/deploy/setup.py similarity index 75% rename from deploy/decoder_setup.py rename to deploy/setup.py index 146538f55..077cabd08 100644 --- a/deploy/decoder_setup.py +++ b/deploy/setup.py @@ -20,7 +20,7 @@ LIBS = ['stdc++'] if platform.system() != 'Darwin': LIBS.append('rt') -ARGS = ['-O3', '-DNDEBUG', '-DKENLM_MAX_ORDER=6'] +ARGS = ['-O3', '-DNDEBUG', '-DKENLM_MAX_ORDER=6', '-std=c++11'] if compile_test('zlib.h', 'z'): ARGS.append('-DHAVE_ZLIB') @@ -34,24 +34,21 @@ if compile_test('lzma.h', 'lzma'): ARGS.append('-DHAVE_XZLIB') LIBS.append('lzma') -os.system('swig -python -c++ ./ctc_decoders.i') +os.system('swig -python -c++ ./decoders.i') ctc_beam_search_decoder_module = [ Extension( - name='_swig_ctc_decoders', - sources=FILES + [ - 'scorer.cpp', 'ctc_decoders_wrap.cxx', 'ctc_decoders.cpp', - 'decoder_utils.cpp' - ], + name='_swig_decoders', + sources=FILES + glob.glob('*.cxx') + glob.glob('*.cpp'), language='C++', - include_dirs=['.', './kenlm'], + include_dirs=['.', './kenlm', './openfst-1.6.3/src/include'], libraries=LIBS, extra_compile_args=ARGS) ] setup( - name='swig_ctc_decoders', + name='swig_decoders', version='0.1', description="""CTC decoders""", ext_modules=ctc_beam_search_decoder_module, - py_modules=['swig_ctc_decoders'], ) + py_modules=['swig_decoders'], ) diff --git a/deploy/setup.sh b/deploy/setup.sh deleted file mode 100644 index 423f5b892..000000000 --- a/deploy/setup.sh +++ /dev/null @@ -1,11 +0,0 @@ -echo "Run decoder setup ..." - -python decoder_setup.py install -rm -r ./build - -echo "Run scorer setup ..." - -python scorer_setup.py install -rm -r ./build - -echo "Finish the installation of decoder and scorer." diff --git a/deploy/swig_decoders.py b/deploy/swig_decoders_wrapper.py similarity index 68% rename from deploy/swig_decoders.py rename to deploy/swig_decoders_wrapper.py index 0247c0c9e..54c430147 100644 --- a/deploy/swig_decoders.py +++ b/deploy/swig_decoders_wrapper.py @@ -3,9 +3,25 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import swig_ctc_decoders -#import multiprocessing -from pathos.multiprocessing import Pool +import swig_decoders +import multiprocessing + + +class Scorer(swig_decoders.Scorer): + """Wrapper for Scorer. + + :param alpha: Parameter associated with language model. Don't use + language model when alpha = 0. + :type alpha: float + :param beta: Parameter associated with word count. Don't use word + count when beta = 0. + :type beta: float + :model_path: Path to load language model. + :type model_path: basestring + """ + + def __init__(self, alpha, beta, model_path): + swig_decoders.Scorer.__init__(self, alpha, beta, model_path) def ctc_best_path_decoder(probs_seq, vocabulary): @@ -20,8 +36,7 @@ def ctc_best_path_decoder(probs_seq, vocabulary): :return: Decoding result string. :rtype: basestring """ - return swig_ctc_decoders.ctc_best_path_decoder(probs_seq.tolist(), - vocabulary) + return swig_decoders.ctc_best_path_decoder(probs_seq.tolist(), vocabulary) def ctc_beam_search_decoder( @@ -54,9 +69,9 @@ def ctc_beam_search_decoder( results, in descending order of the probability. :rtype: list """ - return swig_ctc_decoders.ctc_beam_search_decoder( - probs_seq.tolist(), beam_size, vocabulary, blank_id, cutoff_prob, - ext_scoring_func) + return swig_decoders.ctc_beam_search_decoder(probs_seq.tolist(), beam_size, + vocabulary, blank_id, + cutoff_prob, ext_scoring_func) def ctc_beam_search_decoder_batch(probs_split, @@ -86,25 +101,4 @@ def ctc_beam_search_decoder_batch(probs_split, pool.close() pool.join() beam_search_results = [result.get() for result in results] - """ - len_args = len(probs_split) - beam_search_results = pool.map(ctc_beam_search_decoder, - probs_split, - [beam_size for i in xrange(len_args)], - [vocabulary for i in xrange(len_args)], - [blank_id for i in xrange(len_args)], - [cutoff_prob for i in xrange(len_args)], - [ext_scoring_func for i in xrange(len_args)] - ) - """ - ''' - processes = [mp.Process(target=ctc_beam_search_decoder, - args=(probs_list, beam_size, vocabulary, blank_id, cutoff_prob, - ext_scoring_func) for probs_list in probs_split] - for p in processes: - p.start() - for p in processes: - p.join() - beam_search_results = [] - ''' return beam_search_results From dad406a49bffc8c59655482ace9d949a7e6bef16 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Wed, 23 Aug 2017 11:03:44 +0800 Subject: [PATCH 149/335] add the support of parallel beam search decoding in deployment --- deploy.py | 31 ++++++++++++---- deploy/README.md | 15 +++++++- deploy/ctc_decoders.cpp | 44 +++++++++++++++++++++-- deploy/ctc_decoders.h | 53 +++++++++++++++++++-------- deploy/decoders.i | 2 ++ deploy/setup.py | 6 ++-- deploy/swig_decoders_wrapper.py | 64 ++++++++++++++++++--------------- 7 files changed, 160 insertions(+), 55 deletions(-) diff --git a/deploy.py b/deploy.py index 2d29973fb..76b616052 100644 --- a/deploy.py +++ b/deploy.py @@ -18,7 +18,7 @@ import time parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( "--num_samples", - default=10, + default=32, type=int, help="Number of samples for inference. (default: %(default)s)") parser.add_argument( @@ -46,6 +46,11 @@ parser.add_argument( default=multiprocessing.cpu_count(), type=int, help="Number of cpu threads for preprocessing data. (default: %(default)s)") +parser.add_argument( + "--num_processes_beam_search", + default=multiprocessing.cpu_count(), + type=int, + help="Number of cpu processes for beam search. (default: %(default)s)") parser.add_argument( "--mean_std_filepath", default='mean_std.npz', @@ -70,8 +75,8 @@ parser.add_argument( "--decode_method", default='beam_search', type=str, - help="Method for ctc decoding: best_path or beam_search. (default: %(default)s)" -) + help="Method for ctc decoding: beam_search or beam_search_batch. " + "(default: %(default)s)") parser.add_argument( "--beam_size", default=200, @@ -169,15 +174,28 @@ def infer(): ## decode and print time_begin = time.time() wer_sum, wer_counter = 0, 0 - for i, probs in enumerate(probs_split): - beam_result = ctc_beam_search_decoder( - probs_seq=probs, + batch_beam_results = [] + if args.decode_method == 'beam_search': + for i, probs in enumerate(probs_split): + beam_result = ctc_beam_search_decoder( + probs_seq=probs, + beam_size=args.beam_size, + vocabulary=data_generator.vocab_list, + blank_id=len(data_generator.vocab_list), + cutoff_prob=args.cutoff_prob, + ext_scoring_func=ext_scorer, ) + batch_beam_results += [beam_result] + else: + batch_beam_results = ctc_beam_search_decoder_batch( + probs_split=probs_split, beam_size=args.beam_size, vocabulary=data_generator.vocab_list, blank_id=len(data_generator.vocab_list), + num_processes=args.num_processes_beam_search, cutoff_prob=args.cutoff_prob, ext_scoring_func=ext_scorer, ) + for i, beam_result in enumerate(batch_beam_results): print("\nTarget Transcription:\t%s" % target_transcription[i]) print("Beam %d: %f \t%s" % (0, beam_result[0][0], beam_result[0][1])) wer_cur = wer(target_transcription[i], beam_result[0][1]) @@ -185,6 +203,7 @@ def infer(): wer_counter += 1 print("cur wer = %f , average wer = %f" % (wer_cur, wer_sum / wer_counter)) + time_end = time.time() print("total time = %f" % (time_end - time_begin)) diff --git a/deploy/README.md b/deploy/README.md index cf0c04391..98dde7a60 100644 --- a/deploy/README.md +++ b/deploy/README.md @@ -1,12 +1,25 @@ ### Installation -The setup of the decoder for deployment depends on the source code of [kenlm](https://github.com/kpu/kenlm/) and [openfst](http://www.openfst.org/twiki/bin/view/FST/WebHome), first clone kenlm and download openfst to current directory (i.e., `deep_speech_2/deploy`) +The build of the decoder for deployment depends on several open-sourced projects, first clone or download them to current directory (i.e., `deep_speech_2/deploy`) + +- [**KenLM**](https://github.com/kpu/kenlm/): Faster and Smaller Language Model Queries ```shell git clone https://github.com/kpu/kenlm.git +``` + +- [**OpenFst**](http://www.openfst.org/twiki/bin/view/FST/WebHome): A library for finite-state transducers + +```shell wget http://www.openfst.org/twiki/pub/FST/FstDownload/openfst-1.6.3.tar.gz tar -xzvf openfst-1.6.3.tar.gz ``` +- [**ThreadPool**](http://progsch.net/wordpress/): A library for C++ thread pool + +```shell +git clone https://github.com/progschj/ThreadPool.git +``` + Then run the setup ```shell diff --git a/deploy/ctc_decoders.cpp b/deploy/ctc_decoders.cpp index 75555c018..b22a45a70 100644 --- a/deploy/ctc_decoders.cpp +++ b/deploy/ctc_decoders.cpp @@ -6,6 +6,7 @@ #include #include "ctc_decoders.h" #include "decoder_utils.h" +#include "ThreadPool.h" typedef double log_prob_type; @@ -33,7 +34,8 @@ T log_sum_exp(T x, T y) } std::string ctc_best_path_decoder(std::vector > probs_seq, - std::vector vocabulary) { + std::vector vocabulary) +{ // dimension check int num_time_steps = probs_seq.size(); for (int i=0; i > std::vector vocabulary, int blank_id, double cutoff_prob, - Scorer *ext_scorer, - bool nproc) { + Scorer *ext_scorer) +{ // dimension check int num_time_steps = probs_seq.size(); for (int i=0; i > pair_comp_first_rev); return beam_result; } + + +std::vector>> + ctc_beam_search_decoder_batch( + std::vector>> probs_split, + int beam_size, + std::vector vocabulary, + int blank_id, + int num_processes, + double cutoff_prob, + Scorer *ext_scorer + ) +{ + if (num_processes <= 0) { + std::cout << "num_processes must be nonnegative!" << std::endl; + exit(1); + } + // thread pool + ThreadPool pool(num_processes); + // number of samples + int batch_size = probs_split.size(); + // enqueue the tasks of decoding + std::vector>>> res; + for (int i = 0; i < batch_size; i++) { + res.emplace_back( + pool.enqueue(ctc_beam_search_decoder, probs_split[i], + beam_size, vocabulary, blank_id, cutoff_prob, ext_scorer) + ); + } + // get decoding results + std::vector>> batch_results; + for (int i = 0; i < batch_size; i++) { + batch_results.emplace_back(res[i].get()); + } + return batch_results; +} diff --git a/deploy/ctc_decoders.h b/deploy/ctc_decoders.h index 50a6014f0..238903820 100644 --- a/deploy/ctc_decoders.h +++ b/deploy/ctc_decoders.h @@ -6,8 +6,20 @@ #include #include "scorer.h" -/* CTC Beam Search Decoder, the interface is consistent with the - * original decoder in Python version. +/* CTC Best Path Decoder + * + * Parameters: + * probs_seq: 2-D vector that each element is a vector of probabilities + * over vocabulary of one time step. + * vocabulary: A vector of vocabulary. + * Return: + * A vector that each element is a pair of score and decoding result, + * in desending order. + */ +std::string ctc_best_path_decoder(std::vector > probs_seq, + std::vector vocabulary); + +/* CTC Beam Search Decoder * Parameters: * probs_seq: 2-D vector that each element is a vector of probabilities @@ -17,7 +29,6 @@ * blank_id: ID of blank. * cutoff_prob: Cutoff probability of pruning * ext_scorer: External scorer to evaluate a prefix. - * nproc: Whether this function used in multiprocessing. * Return: * A vector that each element is a pair of score and decoding result, * in desending order. @@ -28,21 +39,35 @@ std::vector > std::vector vocabulary, int blank_id, double cutoff_prob=1.0, - Scorer *ext_scorer=NULL, - bool nproc=false + Scorer *ext_scorer=NULL ); -/* CTC Best Path Decoder - * +/* CTC Beam Search Decoder for batch data, the interface is consistent with the + * original decoder in Python version. + * Parameters: - * probs_seq: 2-D vector that each element is a vector of probabilities - * over vocabulary of one time step. + * probs_seq: 3-D vector that each element is a 2-D vector that can be used + * by ctc_beam_search_decoder(). + * . + * beam_size: The width of beam search. * vocabulary: A vector of vocabulary. + * blank_id: ID of blank. + * num_processes: Number of threads for beam search. + * cutoff_prob: Cutoff probability of pruning + * ext_scorer: External scorer to evaluate a prefix. * Return: - * A vector that each element is a pair of score and decoding result, - * in desending order. - */ -std::string ctc_best_path_decoder(std::vector > probs_seq, - std::vector vocabulary); + * A 2-D vector that each element is a vector of decoding result for one + * sample. +*/ +std::vector>> + ctc_beam_search_decoder_batch(std::vector>> probs_split, + int beam_size, + std::vector vocabulary, + int blank_id, + int num_processes, + double cutoff_prob=1.0, + Scorer *ext_scorer=NULL + ); + #endif // CTC_BEAM_SEARCH_DECODER_H_ diff --git a/deploy/decoders.i b/deploy/decoders.i index 04736e09e..34da1eca6 100644 --- a/deploy/decoders.i +++ b/deploy/decoders.i @@ -17,6 +17,8 @@ namespace std{ %template(Pair) std::pair; %template(PairFloatStringVector) std::vector >; %template(PairDoubleStringVector) std::vector >; + %template(PairDoubleStringVector2) std::vector > >; + %template(DoubleVector3) std::vector > >; } %import decoder_utils.h diff --git a/deploy/setup.py b/deploy/setup.py index 077cabd08..1342478b2 100644 --- a/deploy/setup.py +++ b/deploy/setup.py @@ -36,12 +36,12 @@ if compile_test('lzma.h', 'lzma'): os.system('swig -python -c++ ./decoders.i') -ctc_beam_search_decoder_module = [ +decoders_module = [ Extension( name='_swig_decoders', sources=FILES + glob.glob('*.cxx') + glob.glob('*.cpp'), language='C++', - include_dirs=['.', './kenlm', './openfst-1.6.3/src/include'], + include_dirs=['.', 'kenlm', 'openfst-1.6.3/src/include', 'ThreadPool'], libraries=LIBS, extra_compile_args=ARGS) ] @@ -50,5 +50,5 @@ setup( name='swig_decoders', version='0.1', description="""CTC decoders""", - ext_modules=ctc_beam_search_decoder_module, + ext_modules=decoders_module, py_modules=['swig_decoders'], ) diff --git a/deploy/swig_decoders_wrapper.py b/deploy/swig_decoders_wrapper.py index 54c430147..51f3173b2 100644 --- a/deploy/swig_decoders_wrapper.py +++ b/deploy/swig_decoders_wrapper.py @@ -4,7 +4,6 @@ from __future__ import division from __future__ import print_function import swig_decoders -import multiprocessing class Scorer(swig_decoders.Scorer): @@ -39,14 +38,13 @@ def ctc_best_path_decoder(probs_seq, vocabulary): return swig_decoders.ctc_best_path_decoder(probs_seq.tolist(), vocabulary) -def ctc_beam_search_decoder( - probs_seq, - beam_size, - vocabulary, - blank_id, - cutoff_prob=1.0, - ext_scoring_func=None, ): - """Wrapper for CTC Beam Search Decoder. +def ctc_beam_search_decoder(probs_seq, + beam_size, + vocabulary, + blank_id, + cutoff_prob=1.0, + ext_scoring_func=None): + """Wrapper for the CTC Beam Search Decoder. :param probs_seq: 2-D list of probability distributions over each time step, with each element being a list of normalized @@ -81,24 +79,34 @@ def ctc_beam_search_decoder_batch(probs_split, num_processes, cutoff_prob=1.0, ext_scoring_func=None): - """Wrapper for CTC beam search decoder in batch - """ - - # TODO: to resolve PicklingError - - if not num_processes > 0: - raise ValueError("Number of processes must be positive!") + """Wrapper for the batched CTC beam search decoder. - pool = Pool(processes=num_processes) - results = [] - args_list = [] - for i, probs_list in enumerate(probs_split): - args = (probs_list, beam_size, vocabulary, blank_id, cutoff_prob, - ext_scoring_func) - args_list.append(args) - results.append(pool.apply_async(ctc_beam_search_decoder, args)) + :param probs_seq: 3-D list with each element as an instance of 2-D list + of probabilities used by ctc_beam_search_decoder(). + :type probs_seq: 3-D list + :param beam_size: Width for beam search. + :type beam_size: int + :param vocabulary: Vocabulary list. + :type vocabulary: list + :param blank_id: ID of blank. + :type blank_id: int + :param num_processes: Number of parallel processes. + :type num_processes: int + :param cutoff_prob: Cutoff probability in pruning, + default 1.0, no pruning. + :param num_processes: Number of parallel processes. + :type num_processes: int + :type cutoff_prob: float + :param ext_scoring_func: External scoring function for + partially decoded sentence, e.g. word count + or language model. + :type external_scoring_function: callable + :return: List of tuples of log probability and sentence as decoding + results, in descending order of the probability. + :rtype: list + """ + probs_split = [probs_seq.tolist() for probs_seq in probs_split] - pool.close() - pool.join() - beam_search_results = [result.get() for result in results] - return beam_search_results + return swig_decoders.ctc_beam_search_decoder_batch( + probs_split, beam_size, vocabulary, blank_id, num_processes, + cutoff_prob, ext_scoring_func) From 3ee020397cafca64cace4c71123c53b4fe8999a0 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Wed, 23 Aug 2017 11:06:27 +0800 Subject: [PATCH 150/335] Refactor scorer and move utility functions to decoder_util.h --- deploy/README.md | 2 + deploy/ctc_decoders.cpp | 23 ------ deploy/decoder_utils.cpp | 7 ++ deploy/decoder_utils.h | 33 ++++++--- deploy/decoders.i | 9 ++- deploy/scorer.cpp | 148 ++++++++++++++++++--------------------- deploy/scorer.h | 69 ++++++++++++------ 7 files changed, 154 insertions(+), 137 deletions(-) diff --git a/deploy/README.md b/deploy/README.md index cf0c04391..162a396a4 100644 --- a/deploy/README.md +++ b/deploy/README.md @@ -7,6 +7,8 @@ wget http://www.openfst.org/twiki/pub/FST/FstDownload/openfst-1.6.3.tar.gz tar -xzvf openfst-1.6.3.tar.gz ``` +Compiling for python interface requires swig, please make sure swig being installed. + Then run the setup ```shell diff --git a/deploy/ctc_decoders.cpp b/deploy/ctc_decoders.cpp index 75555c018..836fb435d 100644 --- a/deploy/ctc_decoders.cpp +++ b/deploy/ctc_decoders.cpp @@ -9,29 +9,6 @@ typedef double log_prob_type; - -template -bool pair_comp_first_rev(const std::pair a, const std::pair b) -{ - return a.first > b.first; -} - -template -bool pair_comp_second_rev(const std::pair a, const std::pair b) -{ - return a.second > b.second; -} - -template -T log_sum_exp(T x, T y) -{ - static T num_min = -std::numeric_limits::max(); - if (x <= num_min) return y; - if (y <= num_min) return x; - T xmax = std::max(x, y); - return std::log(std::exp(x-xmax) + std::exp(y-xmax)) + xmax; -} - std::string ctc_best_path_decoder(std::vector > probs_seq, std::vector vocabulary) { // dimension check diff --git a/deploy/decoder_utils.cpp b/deploy/decoder_utils.cpp index 82e4cd146..d616d7c66 100644 --- a/deploy/decoder_utils.cpp +++ b/deploy/decoder_utils.cpp @@ -3,3 +3,10 @@ #include #include "decoder_utils.h" +size_t get_utf8_str_len(const std::string& str) { + size_t str_len = 0; + for (char c : str) { + str_len += ((c & 0xc0) != 0x80); + } + return str_len; +} diff --git a/deploy/decoder_utils.h b/deploy/decoder_utils.h index 6d58bf1f3..9419e005a 100644 --- a/deploy/decoder_utils.h +++ b/deploy/decoder_utils.h @@ -1,15 +1,32 @@ -#ifndef DECODER_UTILS_H -#define DECODER_UTILS_H -#pragma once +#ifndef DECODER_UTILS_H_ +#define DECODER_UTILS_H_ + #include -/* template -bool pair_comp_first_rev(const std::pair a, const std::pair b); +bool pair_comp_first_rev(const std::pair &a, const std::pair &b) +{ + return a.first > b.first; +} template -bool pair_comp_second_rev(const std::pair a, const std::pair b); +bool pair_comp_second_rev(const std::pair &a, const std::pair &b) +{ + return a.second > b.second; +} + +template +T log_sum_exp(const T &x, const T &y) +{ + static T num_min = -std::numeric_limits::max(); + if (x <= num_min) return y; + if (y <= num_min) return x; + T xmax = std::max(x, y); + return std::log(std::exp(x-xmax) + std::exp(y-xmax)) + xmax; +} + +// Get length of utf8 encoding string +// See: http://stackoverflow.com/a/4063229 +size_t get_utf8_str_len(const std::string& str); -template T log_sum_exp(T x, T y); -*/ #endif // DECODER_UTILS_H diff --git a/deploy/decoders.i b/deploy/decoders.i index 04736e09e..ed7c85e67 100644 --- a/deploy/decoders.i +++ b/deploy/decoders.i @@ -2,13 +2,15 @@ %{ #include "scorer.h" #include "ctc_decoders.h" +#include "decoder_utils.h" %} %include "std_vector.i" %include "std_pair.i" %include "std_string.i" +%import "decoder_utils.h" -namespace std{ +namespace std { %template(DoubleVector) std::vector; %template(IntVector) std::vector; %template(StringVector) std::vector; @@ -19,6 +21,9 @@ namespace std{ %template(PairDoubleStringVector) std::vector >; } -%import decoder_utils.h +%template(IntDoublePairCompSecondRev) pair_comp_second_rev; +%template(StringDoublePairCompSecondRev) pair_comp_second_rev; +%template(DoubleStringPairCompFirstRev) pair_comp_first_rev; + %include "scorer.h" %include "ctc_decoders.h" diff --git a/deploy/scorer.cpp b/deploy/scorer.cpp index e9a74b989..17bb6e10d 100644 --- a/deploy/scorer.cpp +++ b/deploy/scorer.cpp @@ -1,103 +1,89 @@ #include #include #include "scorer.h" -#include "lm/model.hh" -#include "util/tokenize_piece.hh" -#include "util/string_piece.hh" +#include "decoder_utils.h" -using namespace lm::ngram; - -Scorer::Scorer(float alpha, float beta, std::string lm_model_path) { - this->_alpha = alpha; - this->_beta = beta; - - if (access(lm_model_path.c_str(), F_OK) != 0) { - std::cout<<"Invalid language model path!"<_language_model = LoadVirtual(lm_model_path.c_str()); +Scorer::Scorer(double alpha, double beta, const std::string& lm_path) { + this->alpha = alpha; + this->beta = beta; + _is_character_based = true; + _language_model = nullptr; + _max_order = 0; + // load language model + load_LM(lm_path.c_str()); } -Scorer::~Scorer(){ - delete (lm::base::Model *)this->_language_model; +Scorer::~Scorer() { + if (_language_model != nullptr) + delete static_cast(_language_model); } -/* Strip a input sentence - * Parameters: - * str: A reference to the objective string - * ch: The character to prune - * Return: - * void - */ -inline void strip(std::string &str, char ch=' ') { - if (str.size() == 0) return; - int start = 0; - int end = str.size()-1; - for (int i=0; i=0; i--) { - if (str[i] == ch) { - end --; - } else { - break; + RetriveStrEnumerateVocab enumerate; + Config config; + config.enumerate_vocab = &enumerate; + _language_model = lm::ngram::LoadVirtual(filename, config); + _max_order = static_cast(_language_model)->Order(); + _vocabulary = enumerate.vocabulary; + for (size_t i = 0; i < _vocabulary.size(); ++i) { + if (_is_character_based + && _vocabulary[i] != UNK_TOKEN + && _vocabulary[i] != START_TOKEN + && _vocabulary[i] != END_TOKEN + && get_utf8_str_len(enumerate.vocabulary[i]) > 1) { + _is_character_based = false; } } - - if (start == 0 && end == str.size()-1) return; - if (start > end) { - std::string emp_str; - str = emp_str; - } else { - str = str.substr(start, end-start+1); - } } -int Scorer::word_count(std::string sentence) { - strip(sentence); - int cnt = 1; - for (int i=0; i& words) { + lm::base::Model* model = static_cast(_language_model); + double cond_prob; + State state, tmp_state, out_state; + // avoid to inserting in begin + model->NullContextWrite(&state); + for (size_t i = 0; i < words.size(); ++i) { + lm::WordIndex word_index = model->BaseVocabulary().Index(words[i]); + // encounter OOV + if (word_index == 0) { + return OOV_SCOER; } - } - return cnt; -} - -double Scorer::language_model_score(std::string sentence) { - lm::base::Model *model = (lm::base::Model *)this->_language_model; - State state, out_state; - lm::FullScoreReturn ret; - model->BeginSentenceWrite(&state); - - for (util::TokenIter it(sentence, ' '); it; ++it){ - lm::WordIndex wid = model->BaseVocabulary().Index(*it); - ret = model->BaseFullScore(&state, wid, &out_state); + cond_prob = model->BaseScore(&state, word_index, &out_state); + tmp_state = state; state = out_state; + out_state = tmp_state; } - //log10 prob - double log_prob = ret.prob; - return log_prob; + // log10 prob + return cond_prob; } -void Scorer::reset_params(float alpha, float beta) { - this->_alpha = alpha; - this->_beta = beta; +double Scorer::get_sent_log_prob(const std::vector& words) { + std::vector sentence; + if (words.size() == 0) { + for (size_t i = 0; i < _max_order; ++i) { + sentence.push_back(START_TOKEN); + } + } else { + for (size_t i = 0; i < _max_order - 1; ++i) { + sentence.push_back(START_TOKEN); + } + sentence.insert(sentence.end(), words.begin(), words.end()); + } + sentence.push_back(END_TOKEN); + return get_log_prob(sentence); } -double Scorer::get_score(std::string sentence, bool log) { - double lm_score = language_model_score(sentence); - int word_cnt = word_count(sentence); - - double final_score = 0.0; - if (log == false) { - final_score = pow(10, _alpha*lm_score) * pow(word_cnt, _beta); - } else { - final_score = _alpha*lm_score*std::log(10) + _beta*std::log(word_cnt); +double Scorer::get_log_prob(const std::vector& words) { + assert(words.size() > _max_order); + double score = 0.0; + for (size_t i = 0; i < words.size() - _max_order + 1; ++i) { + std::vector ngram(words.begin() + i, + words.begin() + i + _max_order); + score += get_log_cond_prob(ngram); } - return final_score; + return score; } diff --git a/deploy/scorer.h b/deploy/scorer.h index a18e119bc..a650d3753 100644 --- a/deploy/scorer.h +++ b/deploy/scorer.h @@ -2,35 +2,58 @@ #define SCORER_H_ #include +#include +#include +#include "lm/enumerate_vocab.hh" +#include "lm/word_index.hh" +#include "lm/virtual_interface.hh" +#include "util/string_piece.hh" -/* External scorer to evaluate a prefix or a complete sentence - * when a new word appended during decoding, consisting of word - * count and language model scoring. +const double OOV_SCOER = -1000.0; +const std::string START_TOKEN = ""; +const std::string UNK_TOKEN = ""; +const std::string END_TOKEN = ""; - * Example: - * Scorer ext_scorer(alpha, beta, "path_to_language_model.klm"); - * double score = ext_scorer.get_score("sentence_to_score"); - */ -class Scorer{ -private: - float _alpha; - float _beta; - void *_language_model; + // Implement a callback to retrive string vocabulary. +class RetriveStrEnumerateVocab : public lm::EnumerateVocab { +public: + RetriveStrEnumerateVocab() {} - // word insertion term - int word_count(std::string); - // n-gram language model scoring - double language_model_score(std::string); + void Add(lm::WordIndex index, const StringPiece& str) { + vocabulary.push_back(std::string(str.data(), str.length())); + } + + std::vector vocabulary; +}; +// External scorer to query languange score for n-gram or sentence. +// Example: +// Scorer scorer(alpha, beta, "path_of_language_model"); +// scorer.get_log_cond_prob({ "WORD1", "WORD2", "WORD3" }); +// scorer.get_sent_log_prob({ "WORD1", "WORD2", "WORD3" }); +class Scorer{ public: - Scorer(){} - Scorer(float alpha, float beta, std::string lm_model_path); + Scorer(double alpha, double beta, const std::string& lm_path); ~Scorer(); + double get_log_cond_prob(const std::vector& words); + double get_sent_log_prob(const std::vector& words); + size_t get_max_order() { return _max_order; } + bool is_character_based() { return _is_character_based; } + std::vector get_vocab() { return _vocabulary; } + + // expose to decoder + double alpha; + double beta; - // reset params alpha & beta - void reset_params(float alpha, float beta); - // get the final score - double get_score(std::string, bool log=false); +protected: + void load_LM(const char* filename); + double get_log_prob(const std::vector& words); + +private: + void* _language_model; + bool _is_character_based; + size_t _max_order; + std::vector _vocabulary; }; -#endif //SCORER_H_ +#endif // SCORER_H_ From 8dc0b2b0b046002454475095c2db3344cbe0fca1 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Wed, 23 Aug 2017 14:41:41 +0800 Subject: [PATCH 151/335] Make setup.py to support parallel processing. --- deploy/README.md | 2 +- deploy/scorer.cpp | 7 +++-- deploy/setup.py | 70 +++++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 70 insertions(+), 9 deletions(-) diff --git a/deploy/README.md b/deploy/README.md index 9bd55dd9a..90809ad35 100644 --- a/deploy/README.md +++ b/deploy/README.md @@ -25,7 +25,7 @@ git clone https://github.com/progschj/ThreadPool.git Then run the setup ```shell -python setup.py install +python setup.py install --num_processes 4 cd .. ``` diff --git a/deploy/scorer.cpp b/deploy/scorer.cpp index 17bb6e10d..233b4766d 100644 --- a/deploy/scorer.cpp +++ b/deploy/scorer.cpp @@ -1,5 +1,8 @@ #include #include +#include "lm/config.hh" +#include "lm/state.hh" +#include "lm/model.hh" #include "scorer.h" #include "decoder_utils.h" @@ -24,7 +27,7 @@ void Scorer::load_LM(const char* filename) { exit(1); } RetriveStrEnumerateVocab enumerate; - Config config; + lm::ngram::Config config; config.enumerate_vocab = &enumerate; _language_model = lm::ngram::LoadVirtual(filename, config); _max_order = static_cast(_language_model)->Order(); @@ -43,7 +46,7 @@ void Scorer::load_LM(const char* filename) { double Scorer::get_log_cond_prob(const std::vector& words) { lm::base::Model* model = static_cast(_language_model); double cond_prob; - State state, tmp_state, out_state; + lm::ngram::State state, tmp_state, out_state; // avoid to inserting in begin model->NullContextWrite(&state); for (size_t i = 0; i < words.size(); ++i) { diff --git a/deploy/setup.py b/deploy/setup.py index 1342478b2..7a4b7e02c 100644 --- a/deploy/setup.py +++ b/deploy/setup.py @@ -1,17 +1,75 @@ -from setuptools import setup, Extension +"""Script to build and install decoder package.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from setuptools import setup, Extension, distutils import glob import platform -import os +import os, sys +import multiprocessing.pool +import argparse + +parser = argparse.ArgumentParser(description=__doc__) +parser.add_argument( + "--num_processes", + default=1, + type=int, + help="Number of cpu processes to build package. (default: %(default)d)") +args = parser.parse_known_args() + +# reconstruct sys.argv to pass to setup below +sys.argv = [sys.argv[0]] + args[1] + + +# monkey-patch for parallel compilation +# See: https://stackoverflow.com/a/13176803 +def parallelCCompile(self, + sources, + output_dir=None, + macros=None, + include_dirs=None, + debug=0, + extra_preargs=None, + extra_postargs=None, + depends=None): + # those lines are copied from distutils.ccompiler.CCompiler directly + macros, objects, extra_postargs, pp_opts, build = self._setup_compile( + output_dir, macros, include_dirs, sources, depends, extra_postargs) + cc_args = self._get_cc_args(pp_opts, debug, extra_preargs) + + # parallel code + def _single_compile(obj): + try: + src, ext = build[obj] + except KeyError: + return + self._compile(obj, src, ext, cc_args, extra_postargs, pp_opts) + + # convert to list, imap is evaluated on-demand + thread_pool = multiprocessing.pool.ThreadPool(args[0].num_processes) + list(thread_pool.imap(_single_compile, objects)) + return objects def compile_test(header, library): dummy_path = os.path.join(os.path.dirname(__file__), "dummy") - command = "bash -c \"g++ -include " + header + " -l" + library + " -x c++ - <<<'int main() {}' -o " + dummy_path + " >/dev/null 2>/dev/null && rm " + dummy_path + " 2>/dev/null\"" + command = "bash -c \"g++ -include " + header \ + + " -l" + library + " -x c++ - <<<'int main() {}' -o " \ + + dummy_path + " >/dev/null 2>/dev/null && rm " \ + + dummy_path + " 2>/dev/null\"" return os.system(command) == 0 -FILES = glob.glob('kenlm/util/*.cc') + glob.glob('kenlm/lm/*.cc') + glob.glob( - 'kenlm/util/double-conversion/*.cc') +# hack compile to support parallel compiling +distutils.ccompiler.CCompiler.compile = parallelCCompile + +FILES = glob.glob('kenlm/util/*.cc') \ + + glob.glob('kenlm/lm/*.cc') \ + + glob.glob('kenlm/util/double-conversion/*.cc') + +FILES += glob.glob('openfst-1.6.3/src/lib/*.cc') + FILES = [ fn for fn in FILES if not (fn.endswith('main.cc') or fn.endswith('test.cc')) ] @@ -40,7 +98,7 @@ decoders_module = [ Extension( name='_swig_decoders', sources=FILES + glob.glob('*.cxx') + glob.glob('*.cpp'), - language='C++', + language='c++', include_dirs=['.', 'kenlm', 'openfst-1.6.3/src/include', 'ThreadPool'], libraries=LIBS, extra_compile_args=ARGS) From eef364d17c3d8e4402d95960153ebd49d539b594 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Wed, 23 Aug 2017 16:57:25 +0800 Subject: [PATCH 152/335] adapt to the last three commits --- deploy/README.md | 2 +- deploy/scorer.cpp | 85 +++++++++++++++++++++++++++++++++++++++++++++++ deploy/scorer.h | 10 +++++- 3 files changed, 95 insertions(+), 2 deletions(-) diff --git a/deploy/README.md b/deploy/README.md index 90809ad35..9f2be76e8 100644 --- a/deploy/README.md +++ b/deploy/README.md @@ -14,7 +14,7 @@ wget http://www.openfst.org/twiki/pub/FST/FstDownload/openfst-1.6.3.tar.gz tar -xzvf openfst-1.6.3.tar.gz ``` -- [**swig**]: Compiling for python interface requires swig, please make sure swig being installed. +- [**SWIG**](http://www.swig.org): Compiling for python interface requires swig, please make sure swig being installed. - [**ThreadPool**](http://progsch.net/wordpress/): A library for C++ thread pool diff --git a/deploy/scorer.cpp b/deploy/scorer.cpp index 233b4766d..a1be7e0f6 100644 --- a/deploy/scorer.cpp +++ b/deploy/scorer.cpp @@ -3,9 +3,13 @@ #include "lm/config.hh" #include "lm/state.hh" #include "lm/model.hh" +#include "util/tokenize_piece.hh" +#include "util/string_piece.hh" #include "scorer.h" #include "decoder_utils.h" +using namespace lm::ngram; + Scorer::Scorer(double alpha, double beta, const std::string& lm_path) { this->alpha = alpha; this->beta = beta; @@ -90,3 +94,84 @@ double Scorer::get_log_prob(const std::vector& words) { } return score; } + +/* Strip a input sentence + * Parameters: + * str: A reference to the objective string + * ch: The character to prune + * Return: + * void + */ +inline void strip(std::string &str, char ch=' ') { + if (str.size() == 0) return; + int start = 0; + int end = str.size()-1; + for (int i=0; i=0; i--) { + if (str[i] == ch) { + end --; + } else { + break; + } + } + + if (start == 0 && end == str.size()-1) return; + if (start > end) { + std::string emp_str; + str = emp_str; + } else { + str = str.substr(start, end-start+1); + } +} + +int Scorer::word_count(std::string sentence) { + strip(sentence); + int cnt = 1; + for (int i=0; i_language_model; + State state, out_state; + lm::FullScoreReturn ret; + model->BeginSentenceWrite(&state); + + for (util::TokenIter it(sentence, ' '); it; ++it){ + lm::WordIndex wid = model->BaseVocabulary().Index(*it); + ret = model->BaseFullScore(&state, wid, &out_state); + state = out_state; + } + //log10 prob + double log_prob = ret.prob; + return log_prob; +} + +void Scorer::reset_params(float alpha, float beta) { + this->alpha = alpha; + this->beta = beta; +} + +double Scorer::get_score(std::string sentence, bool log) { + double lm_score = get_log_cond_prob(sentence); + int word_cnt = word_count(sentence); + + double final_score = 0.0; + if (log == false) { + final_score = pow(10, alpha * lm_score) * pow(word_cnt, beta); + } else { + final_score = alpha * lm_score * std::log(10) + + beta * std::log(word_cnt); + } + return final_score; +} diff --git a/deploy/scorer.h b/deploy/scorer.h index a650d3753..a52420046 100644 --- a/deploy/scorer.h +++ b/deploy/scorer.h @@ -30,6 +30,7 @@ public: // Example: // Scorer scorer(alpha, beta, "path_of_language_model"); // scorer.get_log_cond_prob({ "WORD1", "WORD2", "WORD3" }); +// scorer.get_log_cond_prob("this a sentence"); // scorer.get_sent_log_prob({ "WORD1", "WORD2", "WORD3" }); class Scorer{ public: @@ -40,7 +41,14 @@ public: size_t get_max_order() { return _max_order; } bool is_character_based() { return _is_character_based; } std::vector get_vocab() { return _vocabulary; } - + // word insertion term + int word_count(std::string); + // get the log cond prob of the last word + double get_log_cond_prob(std::string); + // reset params alpha & beta + void reset_params(float alpha, float beta); + // get the final score + double get_score(std::string, bool log=false); // expose to decoder double alpha; double beta; From b56020549014396ba8eb9d1535001f51fbdf7be3 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Thu, 24 Aug 2017 11:14:56 +0800 Subject: [PATCH 153/335] convert data structure for prefix from map to trie tree --- deploy.py | 9 +- deploy/ctc_decoders.cpp | 250 ++++++++++++++++++++++----------------- deploy/decoder_utils.cpp | 70 +++++++++++ deploy/decoder_utils.h | 14 +++ deploy/path_trie.cpp | 153 ++++++++++++++++++++++++ deploy/path_trie.h | 59 +++++++++ deploy/scorer.cpp | 39 ++++++ deploy/scorer.h | 13 ++ 8 files changed, 492 insertions(+), 115 deletions(-) create mode 100644 deploy/path_trie.cpp create mode 100644 deploy/path_trie.h diff --git a/deploy.py b/deploy.py index 76b616052..833c5c20c 100644 --- a/deploy.py +++ b/deploy.py @@ -18,7 +18,7 @@ import time parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( "--num_samples", - default=32, + default=5, type=int, help="Number of samples for inference. (default: %(default)s)") parser.add_argument( @@ -79,7 +79,7 @@ parser.add_argument( "(default: %(default)s)") parser.add_argument( "--beam_size", - default=200, + default=20, type=int, help="Width for beam search decoding. (default: %(default)d)") parser.add_argument( @@ -104,7 +104,7 @@ parser.add_argument( help="Parameter associated with word count. (default: %(default)f)") parser.add_argument( "--cutoff_prob", - default=0.99, + default=1.0, type=float, help="The cutoff probability of pruning" "in beam search. (default: %(default)f)") @@ -183,7 +183,8 @@ def infer(): vocabulary=data_generator.vocab_list, blank_id=len(data_generator.vocab_list), cutoff_prob=args.cutoff_prob, - ext_scoring_func=ext_scorer, ) + # ext_scoring_func=ext_scorer, + ) batch_beam_results += [beam_result] else: batch_beam_results = ctc_beam_search_decoder_batch( diff --git a/deploy/ctc_decoders.cpp b/deploy/ctc_decoders.cpp index fd553be61..30e855258 100644 --- a/deploy/ctc_decoders.cpp +++ b/deploy/ctc_decoders.cpp @@ -4,11 +4,13 @@ #include #include #include +#include "fst/fstlib.h" #include "ctc_decoders.h" #include "decoder_utils.h" +#include "path_trie.h" #include "ThreadPool.h" -typedef double log_prob_type; +typedef float log_prob_type; std::string ctc_best_path_decoder(std::vector > probs_seq, std::vector vocabulary) @@ -89,24 +91,30 @@ std::vector > exit(1); } - // initialize - // two sets containing selected and candidate prefixes respectively - std::map prefix_set_prev, prefix_set_next; - // probability of prefixes ending with blank and non-blank - std::map log_probs_b_prev, log_probs_nb_prev; - std::map log_probs_b_cur, log_probs_nb_cur; - - static log_prob_type NUM_MAX = std::numeric_limits::max(); - prefix_set_prev["\t"] = 0.0; - log_probs_b_prev["\t"] = 0.0; - log_probs_nb_prev["\t"] = -NUM_MAX; - - for (int time_step=0; time_step prob = probs_seq[time_step]; + static log_prob_type POS_INF = std::numeric_limits::max(); + static log_prob_type NEG_INF = -POS_INF; + static log_prob_type NUM_MIN = std::numeric_limits::min(); + + // init + PathTrie root; + root._log_prob_b_prev = 0.0; + root._score = 0.0; + std::vector prefixes; + prefixes.push_back(&root); + + if ( ext_scorer != nullptr && !ext_scorer->is_character_based()) { + if (ext_scorer->dictionary == nullptr) { + // TODO: init dictionary + } + auto fst_dict = static_cast(ext_scorer->dictionary); + fst::StdVectorFst* dict_ptr = fst_dict->Copy(true); + root.set_dictionary(dict_ptr); + auto matcher = std::make_shared(*dict_ptr, fst::MATCH_INPUT); + root.set_matcher(matcher); + } + for (int time_step = 0; time_step < num_time_steps; time_step++) { + std::vector prob = probs_seq[time_step]; std::vector > prob_idx; for (int i=0; i(i, prob[i])); @@ -132,113 +140,134 @@ std::vector > std::vector > log_prob_idx; for (int i=0; i - (prob_idx[i].first, log(prob_idx[i].second))); + (prob_idx[i].first, log(prob_idx[i].second + NUM_MIN))); } - // extend prefix - for (std::map::iterator - it = prefix_set_prev.begin(); - it != prefix_set_prev.end(); it++) { - std::string l = it->first; - if( prefix_set_next.find(l) == prefix_set_next.end()) { - log_probs_b_cur[l] = log_probs_nb_cur[l] = -NUM_MAX; - } + // loop over chars + for (int index = 0; index < log_prob_idx.size(); index++) { + auto c = log_prob_idx[index].first; + log_prob_type log_prob_c = log_prob_idx[index].second; + //log_prob_type log_probs_prev; - for (int index=0; index_log_prob_b_cur = log_sum_exp( + prefix->_log_prob_b_cur, + log_prob_c + prefix->_score); + continue; + } + // repeated character + if (c == prefix->_character) { + prefix->_log_prob_nb_cur = log_sum_exp( + prefix->_log_prob_nb_cur, + log_prob_c + prefix->_log_prob_nb_prev + ); + } + // get new prefix + auto prefix_new = prefix->get_path_trie(c); + + if (prefix_new != nullptr) { + float log_p = NEG_INF; + + if (c == prefix->_character + && prefix->_log_prob_b_prev > NEG_INF) { + log_p = log_prob_c + prefix->_log_prob_b_prev; + } else if (c != prefix->_character) { + log_p = log_prob_c + prefix->_score; } - if (last_char == new_char) { - log_probs_nb_cur[l_plus] = log_sum_exp( - log_probs_nb_cur[l_plus], - log_prob_c+log_probs_b_prev[l] - ); - log_probs_nb_cur[l] = log_sum_exp( - log_probs_nb_cur[l], - log_prob_c+log_probs_nb_prev[l] - ); - } else if (new_char == " ") { - float score = 0.0; - if (ext_scorer != NULL && l.size() > 1) { - score = ext_scorer->get_score(l.substr(1), true); + + // language model scoring + if (ext_scorer != nullptr && + (c == space_id || ext_scorer->is_character_based()) ) { + PathTrie *prefix_to_score = nullptr; + + // don't score the space + if (ext_scorer->is_character_based()) { + prefix_to_score = prefix_new; + } else { + prefix_to_score = prefix; } - log_probs_prev = log_sum_exp(log_probs_b_prev[l], - log_probs_nb_prev[l]); - log_probs_nb_cur[l_plus] = log_sum_exp( - log_probs_nb_cur[l_plus], - score + log_prob_c + log_probs_prev - ); - } else { - log_probs_prev = log_sum_exp(log_probs_b_prev[l], - log_probs_nb_prev[l]); - log_probs_nb_cur[l_plus] = log_sum_exp( - log_probs_nb_cur[l_plus], - log_prob_c+log_probs_prev - ); + + double score = 0.0; + std::vector ngram; + ngram = ext_scorer->make_ngram(prefix_to_score); + score = ext_scorer->get_log_cond_prob(ngram) * + ext_scorer->alpha; + + log_p += score; + log_p += ext_scorer->beta; + } - prefix_set_next[l_plus] = log_sum_exp( - log_probs_nb_cur[l_plus], - log_probs_b_cur[l_plus] - ); + prefix_new->_log_prob_nb_cur = log_sum_exp( + prefix_new->_log_prob_nb_cur, log_p); } } - prefix_set_next[l] = log_sum_exp(log_probs_b_cur[l], - log_probs_nb_cur[l]); + } // end of loop over chars + + prefixes.clear(); + // update log probabilities + root.iterate_to_vec(prefixes); + + // sort prefixes by score + if (prefixes.size() >= beam_size) { + std::nth_element(prefixes.begin(), + prefixes.begin() + beam_size, + prefixes.end(), + prefix_compare); + + for (size_t i = beam_size; i < prefixes.size(); i++) { + prefixes[i]->remove(); + } + } + } + + for (size_t i = 0; i < beam_size && i < prefixes.size(); i++) { + double approx_ctc = prefixes[i]->_score; + + // remove word insert: + std::vector output; + prefixes[i]->get_path_vec(output); + size_t prefix_length = output.size(); + // remove language model weight: + if (ext_scorer != nullptr) { + // auto words = split_labels(output); + // approx_ctc = approx_ctc - path_length * ext_scorer->beta; + // approx_ctc -= (_lm->get_sent_log_prob(words)) * ext_scorer->alpha; } - log_probs_b_prev = log_probs_b_cur; - log_probs_nb_prev = log_probs_nb_cur; - std::vector > - prefix_vec_next(prefix_set_next.begin(), - prefix_set_next.end()); - std::sort(prefix_vec_next.begin(), - prefix_vec_next.end(), - pair_comp_second_rev); - int num_prefixes_next = prefix_vec_next.size(); - int k = beam_size ( - prefix_vec_next.begin(), - prefix_vec_next.begin() + k - ); + prefixes[i]->_approx_ctc = approx_ctc; } - // post processing - std::vector > beam_result; - for (std::map::iterator - it = prefix_set_prev.begin(); it != prefix_set_prev.end(); it++) { - if (it->second > -NUM_MAX && it->first.size() > 1) { - log_prob_type log_prob = it->second; - std::string sentence = it->first.substr(1); - // scoring the last word - if (ext_scorer != NULL && sentence[sentence.size()-1] != ' ') { - log_prob = log_prob + ext_scorer->get_score(sentence, true); - } - if (log_prob > -NUM_MAX) { - std::pair cur_result(log_prob, sentence); - beam_result.push_back(cur_result); - } + // allow for the post processing + std::vector space_prefixes; + if (space_prefixes.empty()) { + for (size_t i = 0; i < beam_size && i< prefixes.size(); i++) { + space_prefixes.push_back(prefixes[i]); } } - // sort the result and return - std::sort(beam_result.begin(), beam_result.end(), - pair_comp_first_rev); - return beam_result; -} + + std::sort(space_prefixes.begin(), space_prefixes.end(), prefix_compare); + std::vector > output_vecs; + for (size_t i = 0; i < beam_size && i < space_prefixes.size(); i++) { + std::vector output; + space_prefixes[i]->get_path_vec(output); + // convert index to string + std::string output_str; + for (int j = 0; j < output.size(); j++) { + output_str += vocabulary[output[j]]; + } + std::pair output_pair(space_prefixes[i]->_score, + output_str); + output_vecs.emplace_back( + output_pair + ); + } + + return output_vecs; + } std::vector>> @@ -250,8 +279,7 @@ std::vector>> int num_processes, double cutoff_prob, Scorer *ext_scorer - ) -{ + ) { if (num_processes <= 0) { std::cout << "num_processes must be nonnegative!" << std::endl; exit(1); diff --git a/deploy/decoder_utils.cpp b/deploy/decoder_utils.cpp index d616d7c66..366c8d355 100644 --- a/deploy/decoder_utils.cpp +++ b/deploy/decoder_utils.cpp @@ -10,3 +10,73 @@ size_t get_utf8_str_len(const std::string& str) { } return str_len; } + +//------------------------------------------------------- +// Overriding less than operator for sorting +//------------------------------------------------------- +bool prefix_compare(const PathTrie* x, const PathTrie* y) { + if (x->_score == y->_score) { + if (x->_character == y->_character) { + return false; + } else { + return (x->_character < y->_character); + } + } else { + return x->_score > y->_score; + } +} //---------- End path_compare --------------------------- + +// -------------------------------------------------------------- +// Adds word to fst without copying entire dictionary +// -------------------------------------------------------------- +void add_word_to_fst(const std::vector& word, + fst::StdVectorFst* dictionary) { + if (dictionary->NumStates() == 0) { + fst::StdVectorFst::StateId start = dictionary->AddState(); + assert(start == 0); + dictionary->SetStart(start); + } + fst::StdVectorFst::StateId src = dictionary->Start(); + fst::StdVectorFst::StateId dst; + for (auto c : word) { + dst = dictionary->AddState(); + dictionary->AddArc(src, fst::StdArc(c, c, 0, dst)); + src = dst; + } + dictionary->SetFinal(dst, fst::StdArc::Weight::One()); +} // ------------ End of add_word_to_fst ----------------------- + +// --------------------------------------------------------- +// Adds a word to the dictionary FST based on char_map +// --------------------------------------------------------- +bool addWordToDictionary(const std::string& word, + const std::unordered_map& char_map, + bool add_space, + int SPACE, + fst::StdVectorFst* dictionary) { + /* + auto characters = UTF8_split(word); + + std::vector int_word; + + for (auto& c : characters) { + if (c == " ") { + int_word.push_back(SPACE); + } else { + auto int_c = char_map.find(c); + if (int_c != char_map.end()) { + int_word.push_back(int_c->second); + } else { + return false; // return without adding + } + } + } + + if (add_space) { + int_word.push_back(SPACE); + } + + add_word_to_fst(int_word, dictionary); + */ + return true; +} // -------------- End of addWordToDictionary ------------ diff --git a/deploy/decoder_utils.h b/deploy/decoder_utils.h index 9419e005a..d5e7d1860 100644 --- a/deploy/decoder_utils.h +++ b/deploy/decoder_utils.h @@ -2,6 +2,7 @@ #define DECODER_UTILS_H_ #include +#include "path_trie.h" template bool pair_comp_first_rev(const std::pair &a, const std::pair &b) @@ -25,8 +26,21 @@ T log_sum_exp(const T &x, const T &y) return std::log(std::exp(x-xmax) + std::exp(y-xmax)) + xmax; } +//------------------------------------------------------- +// Overriding less than operator for sorting +//------------------------------------------------------- +bool prefix_compare(const PathTrie* x, const PathTrie* y); + // Get length of utf8 encoding string // See: http://stackoverflow.com/a/4063229 size_t get_utf8_str_len(const std::string& str); +void add_word_to_fst(const std::vector& word, + fst::StdVectorFst* dictionary); + +bool addWordToDictionary(const std::string& word, + const std::unordered_map& char_map, + bool add_space, + int SPACE, + fst::StdVectorFst* dictionary); #endif // DECODER_UTILS_H diff --git a/deploy/path_trie.cpp b/deploy/path_trie.cpp new file mode 100644 index 000000000..6cf7ae515 --- /dev/null +++ b/deploy/path_trie.cpp @@ -0,0 +1,153 @@ +#include +#include +#include +#include +#include + +#include "path_trie.h" +#include "decoder_utils.h" + +PathTrie::PathTrie() { + float lowest = -1.0*std::numeric_limits::max(); + _log_prob_b_prev = lowest; + _log_prob_nb_prev = lowest; + _log_prob_b_cur = lowest; + _log_prob_nb_cur = lowest; + _score = lowest; + + _ROOT = -1; + _character = _ROOT; + _exists = true; + _parent = nullptr; + _dictionary = nullptr; + _dictionary_state = 0; + _has_dictionary = false; + _matcher = nullptr; // finds arcs in FST +} + +PathTrie::~PathTrie() { + for (auto child : _children) { + delete child.second; + } +} + +PathTrie* PathTrie::get_path_trie(int new_char, bool reset) { + auto child = _children.begin(); + for (child = _children.begin(); child != _children.end(); ++child) { + if (child->first == new_char) { + break; + } + } + if ( child != _children.end() ) { + if (!child->second->_exists) { + child->second->_exists = true; + float lowest = -1.0*std::numeric_limits::max(); + child->second->_log_prob_b_prev = lowest; + child->second->_log_prob_nb_prev = lowest; + child->second->_log_prob_b_cur = lowest; + child->second->_log_prob_nb_cur = lowest; + } + return (child->second); + } else { + if (_has_dictionary) { + _matcher->SetState(_dictionary_state); + bool found = _matcher->Find(new_char); + if (!found) { + // Adding this character causes word outside dictionary + auto FSTZERO = fst::TropicalWeight::Zero(); + auto final_weight = _dictionary->Final(_dictionary_state); + bool is_final = (final_weight != FSTZERO); + if (is_final && reset) { + _dictionary_state = _dictionary->Start(); + } + return nullptr; + } else { + PathTrie* new_path = new PathTrie; + new_path->_character = new_char; + new_path->_parent = this; + new_path->_dictionary = _dictionary; + new_path->_dictionary_state = _matcher->Value().nextstate; + new_path->_has_dictionary = true; + new_path->_matcher = _matcher; + _children.push_back(std::make_pair(new_char, new_path)); + return new_path; + } + } else { + PathTrie* new_path = new PathTrie; + new_path->_character = new_char; + new_path->_parent = this; + _children.push_back(std::make_pair(new_char, new_path)); + return new_path; + } + } +} + +PathTrie* PathTrie::get_path_vec(std::vector& output) { + return get_path_vec(output, _ROOT); +} + +PathTrie* PathTrie::get_path_vec(std::vector& output, + int stop, + size_t max_steps /*= std::numeric_limits::max() */) { + if (_character == stop || + _character == _ROOT || + output.size() == max_steps) { + std::reverse(output.begin(), output.end()); + return this; + } else { + output.push_back(_character); + return _parent->get_path_vec(output, stop, max_steps); + } +} + +void PathTrie::iterate_to_vec( + std::vector& output) { + if (_exists) { + _log_prob_b_prev = _log_prob_b_cur; + _log_prob_nb_prev = _log_prob_nb_cur; + + _log_prob_b_cur = -1.0 * std::numeric_limits::max(); + _log_prob_nb_cur = -1.0 * std::numeric_limits::max(); + + _score = log_sum_exp(_log_prob_b_prev, _log_prob_nb_prev); + output.push_back(this); + } + for (auto child : _children) { + child.second->iterate_to_vec(output); + } +} + +//------------------------------------------------------- +// Effectively removes node +//------------------------------------------------------- +void PathTrie::remove() { + _exists = false; + + if (_children.size() == 0) { + auto child = _parent->_children.begin(); + for (child = _parent->_children.begin(); + child != _parent->_children.end(); ++child) { + if (child->first == _character) { + _parent->_children.erase(child); + break; + } + } + + if ( _parent->_children.size() == 0 && !_parent->_exists ) { + _parent->remove(); + } + + delete this; + } +} + +void PathTrie::set_dictionary(fst::StdVectorFst* dictionary) { + _dictionary = dictionary; + _dictionary_state = dictionary->Start(); + _has_dictionary = true; +} + +using FSTMATCH = fst::SortedMatcher; +void PathTrie::set_matcher(std::shared_ptr matcher) { + _matcher = matcher; +} diff --git a/deploy/path_trie.h b/deploy/path_trie.h new file mode 100644 index 000000000..7b378e3f9 --- /dev/null +++ b/deploy/path_trie.h @@ -0,0 +1,59 @@ +#ifndef PATH_TRIE_H +#define PATH_TRIE_H +#pragma once +#include +#include +#include +#include +#include +#include + +using FSTMATCH = fst::SortedMatcher; + +class PathTrie { +public: + PathTrie(); + ~PathTrie(); + + PathTrie* get_path_trie(int new_char, bool reset = true); + + PathTrie* get_path_vec(std::vector &output); + + PathTrie* get_path_vec(std::vector& output, + int stop, + size_t max_steps = std::numeric_limits::max()); + + void iterate_to_vec(std::vector &output); + + void set_dictionary(fst::StdVectorFst* dictionary); + + void set_matcher(std::shared_ptr matcher); + + bool is_empty() { + return _ROOT == _character; + } + + void remove(); + + float _log_prob_b_prev; + float _log_prob_nb_prev; + float _log_prob_b_cur; + float _log_prob_nb_cur; + float _score; + float _approx_ctc; + + + int _ROOT; + int _character; + bool _exists; + + PathTrie *_parent; + std::vector > _children; + + fst::StdVectorFst* _dictionary; + fst::StdVectorFst::StateId _dictionary_state; + bool _has_dictionary; + std::shared_ptr _matcher; +}; + +#endif // PATH_TRIE_H diff --git a/deploy/scorer.cpp b/deploy/scorer.cpp index a1be7e0f6..4dc8b253f 100644 --- a/deploy/scorer.cpp +++ b/deploy/scorer.cpp @@ -175,3 +175,42 @@ double Scorer::get_score(std::string sentence, bool log) { } return final_score; } + +//-------------------------------------------------- +// Turn indices back into strings of chars +//-------------------------------------------------- +std::vector Scorer::make_ngram(PathTrie* prefix) { + /* + std::vector ngram; + PathTrie* current_node = prefix; + PathTrie* new_node = nullptr; + + for (int order = 0; order < _max_order; order++) { + std::vector prefix_vec; + + if (_is_character_based) { + new_node = current_node->get_path_vec(prefix_vec, ' ', 1); + current_node = new_node; + } else { + new_node = current_node->getPathVec(prefix_vec, ' '); + current_node = new_node->_parent; // Skipping spaces + } + + // reconstruct word + std::string word = vec2str(prefix_vec); + ngram.push_back(word); + + if (new_node->_character == -1) { + // No more spaces, but still need order + for (int i = 0; i < max_order - order - 1; i++) { + ngram.push_back(""); + } + break; + } + } + std::reverse(ngram.begin(), ngram.end()); + */ + std::vector ngram; + ngram.push_back("this"); + return ngram; +} //---------------- End makeNgrams ------------------ diff --git a/deploy/scorer.h b/deploy/scorer.h index a52420046..f0efbca99 100644 --- a/deploy/scorer.h +++ b/deploy/scorer.h @@ -4,10 +4,12 @@ #include #include #include +#include #include "lm/enumerate_vocab.hh" #include "lm/word_index.hh" #include "lm/virtual_interface.hh" #include "util/string_piece.hh" +#include "path_trie.h" const double OOV_SCOER = -1000.0; const std::string START_TOKEN = ""; @@ -49,18 +51,29 @@ public: void reset_params(float alpha, float beta); // get the final score double get_score(std::string, bool log=false); + // make ngram + std::vector make_ngram(PathTrie* prefix); // expose to decoder double alpha; double beta; + // fst dictionary + void* dictionary; protected: void load_LM(const char* filename); double get_log_prob(const std::vector& words); private: + void _init_char_list(); + void _init_char_map(); + void* _language_model; bool _is_character_based; size_t _max_order; + + std::vector _char_list; + std::unordered_map _char_map; + std::vector _vocabulary; }; From 8ff6221d00e8cc8bd5082a86d3d7f383c05b1430 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Tue, 29 Aug 2017 12:27:30 +0800 Subject: [PATCH 154/335] enable finite-state transducer in beam search decoding --- deploy.py | 8 +-- deploy/ctc_decoders.cpp | 15 +++- deploy/decoder_utils.cpp | 30 +++++++- deploy/decoder_utils.h | 4 +- deploy/scorer.cpp | 143 ++++++++++++++++++++++++++++++++++++--- deploy/scorer.h | 11 ++- 6 files changed, 189 insertions(+), 22 deletions(-) diff --git a/deploy.py b/deploy.py index 833c5c20c..d43ab1e0f 100644 --- a/deploy.py +++ b/deploy.py @@ -18,7 +18,7 @@ import time parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( "--num_samples", - default=5, + default=4, type=int, help="Number of samples for inference. (default: %(default)s)") parser.add_argument( @@ -89,7 +89,8 @@ parser.add_argument( help="Number of output per sample in beam search. (default: %(default)d)") parser.add_argument( "--language_model_path", - default="lm/data/common_crawl_00.prune01111.trie.klm", + default="/home/work/liuyibing/lm_bak/common_crawl_00.prune01111.trie.klm", + #default="ptb_all.arpa", type=str, help="Path for language model. (default: %(default)s)") parser.add_argument( @@ -183,8 +184,7 @@ def infer(): vocabulary=data_generator.vocab_list, blank_id=len(data_generator.vocab_list), cutoff_prob=args.cutoff_prob, - # ext_scoring_func=ext_scorer, - ) + ext_scoring_func=ext_scorer, ) batch_beam_results += [beam_result] else: batch_beam_results = ctc_beam_search_decoder_batch( diff --git a/deploy/ctc_decoders.cpp b/deploy/ctc_decoders.cpp index 30e855258..d84f5b16b 100644 --- a/deploy/ctc_decoders.cpp +++ b/deploy/ctc_decoders.cpp @@ -103,10 +103,13 @@ std::vector > prefixes.push_back(&root); if ( ext_scorer != nullptr && !ext_scorer->is_character_based()) { - if (ext_scorer->dictionary == nullptr) { + if (ext_scorer->_dictionary == nullptr) { // TODO: init dictionary + ext_scorer->set_char_map(vocabulary); + // add_space should be true? + ext_scorer->fill_dictionary(true); } - auto fst_dict = static_cast(ext_scorer->dictionary); + auto fst_dict = static_cast(ext_scorer->_dictionary); fst::StdVectorFst* dict_ptr = fst_dict->Copy(true); root.set_dictionary(dict_ptr); auto matcher = std::make_shared(*dict_ptr, fst::MATCH_INPUT); @@ -288,6 +291,14 @@ std::vector>> ThreadPool pool(num_processes); // number of samples int batch_size = probs_split.size(); + // dictionary init + if ( ext_scorer != nullptr) { + if (ext_scorer->_dictionary == nullptr) { + // TODO: init dictionary + ext_scorer->set_char_map(vocabulary); + ext_scorer->fill_dictionary(true); + } + } // enqueue the tasks of decoding std::vector>>> res; for (int i = 0; i < batch_size; i++) { diff --git a/deploy/decoder_utils.cpp b/deploy/decoder_utils.cpp index 366c8d355..0ec86d6bc 100644 --- a/deploy/decoder_utils.cpp +++ b/deploy/decoder_utils.cpp @@ -11,6 +11,32 @@ size_t get_utf8_str_len(const std::string& str) { return str_len; } +//------------------------------------------------------ +//Splits string into vector of strings representing +//UTF-8 characters (not same as chars) +//------------------------------------------------------ +std::vector UTF8_split(const std::string& str) +{ + std::vector result; + std::string out_str; + + for (char c : str) + { + if ((c & 0xc0) != 0x80) //new UTF-8 character + { + if (!out_str.empty()) + { + result.push_back(out_str); + out_str.clear(); + } + } + + out_str.append(1, c); + } + result.push_back(out_str); + return result; +} + //------------------------------------------------------- // Overriding less than operator for sorting //------------------------------------------------------- @@ -49,12 +75,11 @@ void add_word_to_fst(const std::vector& word, // --------------------------------------------------------- // Adds a word to the dictionary FST based on char_map // --------------------------------------------------------- -bool addWordToDictionary(const std::string& word, +bool add_word_to_dictionary(const std::string& word, const std::unordered_map& char_map, bool add_space, int SPACE, fst::StdVectorFst* dictionary) { - /* auto characters = UTF8_split(word); std::vector int_word; @@ -77,6 +102,5 @@ bool addWordToDictionary(const std::string& word, } add_word_to_fst(int_word, dictionary); - */ return true; } // -------------- End of addWordToDictionary ------------ diff --git a/deploy/decoder_utils.h b/deploy/decoder_utils.h index d5e7d1860..b61cdfbfe 100644 --- a/deploy/decoder_utils.h +++ b/deploy/decoder_utils.h @@ -35,10 +35,12 @@ bool prefix_compare(const PathTrie* x, const PathTrie* y); // See: http://stackoverflow.com/a/4063229 size_t get_utf8_str_len(const std::string& str); +std::vector UTF8_split(const std::string &str); + void add_word_to_fst(const std::vector& word, fst::StdVectorFst* dictionary); -bool addWordToDictionary(const std::string& word, +bool add_word_to_dictionary(const std::string& word, const std::unordered_map& char_map, bool add_space, int SPACE, diff --git a/deploy/scorer.cpp b/deploy/scorer.cpp index 4dc8b253f..ad33a0cda 100644 --- a/deploy/scorer.cpp +++ b/deploy/scorer.cpp @@ -15,7 +15,9 @@ Scorer::Scorer(double alpha, double beta, const std::string& lm_path) { this->beta = beta; _is_character_based = true; _language_model = nullptr; + _dictionary = nullptr; _max_order = 0; + _SPACE = -1; // load language model load_LM(lm_path.c_str()); } @@ -23,6 +25,8 @@ Scorer::Scorer(double alpha, double beta, const std::string& lm_path) { Scorer::~Scorer() { if (_language_model != nullptr) delete static_cast(_language_model); + if (_dictionary != nullptr) + delete static_cast(_dictionary); } void Scorer::load_LM(const char* filename) { @@ -176,11 +180,83 @@ double Scorer::get_score(std::string sentence, bool log) { return final_score; } -//-------------------------------------------------- -// Turn indices back into strings of chars -//-------------------------------------------------- +std::string Scorer::vec2str(const std::vector& input) { + std::string word; + for (auto ind : input) { + word += _char_list[ind]; + } + return word; +} + + +std::vector +Scorer::split_labels(const std::vector &labels) { + if (labels.empty()) + return {}; + + std::string s = vec2str(labels); + std::vector words; + if (_is_character_based) { + words = UTF8_split(s); + } else { + words = split_str(s, " "); + } + return words; +} + +// Split a string into a list of strings on a given string +// delimiter. NB: delimiters on beginning / end of string are +// trimmed. Eg, "FooBarFoo" split on "Foo" returns ["Bar"]. +std::vector Scorer::split_str(const std::string &s, + const std::string &delim) { + std::vector result; + std::size_t start = 0, delim_len = delim.size(); + while (true) { + std::size_t end = s.find(delim, start); + if (end == std::string::npos) { + if (start < s.size()) { + result.push_back(s.substr(start)); + } + break; + } + if (end > start) { + result.push_back(s.substr(start, end - start)); + } + start = end + delim_len; + } + return result; +} + +//--------------------------------------------------- +// Add index to char list for searching language model +//--------------------------------------------------- +void Scorer::set_char_map(std::vector char_list) { + _char_list = char_list; + std::string _SPACE_STR = " "; + + for (unsigned int i = 0; i < _char_list.size(); i++) { + // if (_char_list[i] == _BLANK_STR) { + // _BLANK = i; + // } else + if (_char_list[i] == _SPACE_STR) { + _SPACE = i; + } + } + + _char_map.clear(); + for(unsigned int i = 0; i < _char_list.size(); i++) + { + if(i == (unsigned int)_SPACE){ + _char_map[' '] = i; + } + else if(_char_list[i].size() == 1){ + _char_map[_char_list[i][0]] = i; + } + } + +} //------------- End of set_char_map ---------------- + std::vector Scorer::make_ngram(PathTrie* prefix) { - /* std::vector ngram; PathTrie* current_node = prefix; PathTrie* new_node = nullptr; @@ -189,10 +265,10 @@ std::vector Scorer::make_ngram(PathTrie* prefix) { std::vector prefix_vec; if (_is_character_based) { - new_node = current_node->get_path_vec(prefix_vec, ' ', 1); + new_node = current_node->get_path_vec(prefix_vec, _SPACE, 1); current_node = new_node; } else { - new_node = current_node->getPathVec(prefix_vec, ' '); + new_node = current_node->get_path_vec(prefix_vec, _SPACE); current_node = new_node->_parent; // Skipping spaces } @@ -202,15 +278,60 @@ std::vector Scorer::make_ngram(PathTrie* prefix) { if (new_node->_character == -1) { // No more spaces, but still need order - for (int i = 0; i < max_order - order - 1; i++) { + for (int i = 0; i < _max_order - order - 1; i++) { ngram.push_back(""); } break; } } std::reverse(ngram.begin(), ngram.end()); - */ - std::vector ngram; - ngram.push_back("this"); return ngram; -} //---------------- End makeNgrams ------------------ +} + +//--------------------------------------------------------- +// Helper function to populate Trie with a vocab using the +// char_list for maping from string to int +//--------------------------------------------------------- +void Scorer::fill_dictionary(bool add_space) { + + fst::StdVectorFst dictionary; + // First reverse char_list so ints can be accessed by chars + std::unordered_map char_map; + for (unsigned int i = 0; i < _char_list.size(); i++) { + char_map[_char_list[i]] = i; + } + + // For each unigram convert to ints and put in trie + int vocab_size = 0; + for (const auto& word : _vocabulary) { + bool added = add_word_to_dictionary(word, + char_map, + add_space, + _SPACE, + &dictionary); + vocab_size += added ? 1 : 0; + } + + std::cerr << "Vocab Size " << vocab_size << std::endl; + + // Simplify FST + + // This gets rid of "epsilon" transitions in the FST. + // These are transitions that don't require a string input to be taken. + // Getting rid of them is necessary to make the FST determinisitc, but + // can greatly increase the size of the FST + fst::RmEpsilon(&dictionary); + fst::StdVectorFst* new_dict = new fst::StdVectorFst; + + // This makes the FST deterministic, meaning for any string input there's + // only one possible state the FST could be in. It is assumed our + // dictionary is deterministic when using it. + // (lest we'd have to check for multiple transitions at each state) + fst::Determinize(dictionary, new_dict); + + // Finds the simplest equivalent fst. This is unnecessary but decreases + // memory usage of the dictionary + fst::Minimize(new_dict); + _dictionary = new_dict; + +} diff --git a/deploy/scorer.h b/deploy/scorer.h index f0efbca99..9ba55dd6d 100644 --- a/deploy/scorer.h +++ b/deploy/scorer.h @@ -53,15 +53,23 @@ public: double get_score(std::string, bool log=false); // make ngram std::vector make_ngram(PathTrie* prefix); + // fill dictionary for fst + void fill_dictionary(bool add_space); + // set char map + void set_char_map(std::vector char_list); // expose to decoder double alpha; double beta; // fst dictionary - void* dictionary; + void* _dictionary; protected: void load_LM(const char* filename); double get_log_prob(const std::vector& words); + std::string vec2str(const std::vector &input); + std::vector split_labels(const std::vector &labels); + std::vector split_str(const std::string &s, + const std::string &delim); private: void _init_char_list(); @@ -71,6 +79,7 @@ private: bool _is_character_based; size_t _max_order; + unsigned int _SPACE; std::vector _char_list; std::unordered_map _char_map; From 9a79b41bcdd2262590fd3d14daf91731430e42e1 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Tue, 29 Aug 2017 18:54:15 +0800 Subject: [PATCH 155/335] streamline source code --- deploy/ctc_decoders.cpp | 67 +++++++++++++++++----------------------- deploy/decoder_utils.cpp | 27 ++++++++++++++-- deploy/decoder_utils.h | 19 ++++++++---- deploy/path_trie.cpp | 27 +++++++--------- deploy/scorer.cpp | 65 +++++++------------------------------- deploy/scorer.h | 9 ++---- 6 files changed, 92 insertions(+), 122 deletions(-) diff --git a/deploy/ctc_decoders.cpp b/deploy/ctc_decoders.cpp index d84f5b16b..da37708af 100644 --- a/deploy/ctc_decoders.cpp +++ b/deploy/ctc_decoders.cpp @@ -10,8 +10,6 @@ #include "path_trie.h" #include "ThreadPool.h" -typedef float log_prob_type; - std::string ctc_best_path_decoder(std::vector > probs_seq, std::vector vocabulary) { @@ -19,8 +17,8 @@ std::string ctc_best_path_decoder(std::vector > probs_seq, int num_time_steps = probs_seq.size(); for (int i=0; i > probs_seq, std::vector max_idx_vec; double max_prob = 0.0; int max_idx = 0; - for (int i=0; i > probs_seq, } std::vector idx_vec; - for (int i=0; i0) && max_idx_vec[i]!=max_idx_vec[i-1])) { + for (int i = 0; i < max_idx_vec.size(); i++) { + if ((i == 0) || ((i > 0) && max_idx_vec[i] != max_idx_vec[i-1])) { idx_vec.push_back(max_idx_vec[i]); } } std::string best_path_result; - for (int i=0; i > { // dimension check int num_time_steps = probs_seq.size(); - for (int i=0; i > std::vector::iterator it = std::find(vocabulary.begin(), vocabulary.end(), " "); int space_id = it - vocabulary.begin(); + // if no space in vocabulary if(space_id >= vocabulary.size()) { - std::cout << " The character space is not in the vocabulary!"<::max(); - static log_prob_type NEG_INF = -POS_INF; - static log_prob_type NUM_MIN = std::numeric_limits::min(); - // init PathTrie root; - root._log_prob_b_prev = 0.0; - root._score = 0.0; + root._score = root._log_prob_b_prev = 0.0; std::vector prefixes; prefixes.push_back(&root); @@ -140,17 +133,17 @@ std::vector > prob_idx.begin() + cutoff_len); } - std::vector > log_prob_idx; - for (int i=0; i - (prob_idx[i].first, log(prob_idx[i].second + NUM_MIN))); + std::vector > log_prob_idx; + for (int i = 0; i < cutoff_len; i++) { + log_prob_idx.push_back(std::pair + (prob_idx[i].first, log(prob_idx[i].second + NUM_FLT_MIN))); } // loop over chars for (int index = 0; index < log_prob_idx.size(); index++) { auto c = log_prob_idx[index].first; - log_prob_type log_prob_c = log_prob_idx[index].second; - //log_prob_type log_probs_prev; + float log_prob_c = log_prob_idx[index].second; + //float log_probs_prev; for (int i = 0; i < prefixes.size() && i > if (c == prefix->_character) { prefix->_log_prob_nb_cur = log_sum_exp( prefix->_log_prob_nb_cur, - log_prob_c + prefix->_log_prob_nb_prev - ); + log_prob_c + prefix->_log_prob_nb_prev); } // get new prefix auto prefix_new = prefix->get_path_trie(c); if (prefix_new != nullptr) { - float log_p = NEG_INF; + float log_p = -NUM_FLT_INF; if (c == prefix->_character - && prefix->_log_prob_b_prev > NEG_INF) { + && prefix->_log_prob_b_prev > -NUM_FLT_INF) { log_p = log_prob_c + prefix->_log_prob_b_prev; } else if (c != prefix->_character) { log_p = log_prob_c + prefix->_score; @@ -201,7 +193,6 @@ std::vector > log_p += score; log_p += ext_scorer->beta; - } prefix_new->_log_prob_nb_cur = log_sum_exp( prefix_new->_log_prob_nb_cur, log_p); @@ -273,7 +264,7 @@ std::vector > } -std::vector>> +std::vector > > ctc_beam_search_decoder_batch( std::vector>> probs_split, int beam_size, @@ -292,12 +283,12 @@ std::vector>> // number of samples int batch_size = probs_split.size(); // dictionary init - if ( ext_scorer != nullptr) { - if (ext_scorer->_dictionary == nullptr) { - // TODO: init dictionary - ext_scorer->set_char_map(vocabulary); - ext_scorer->fill_dictionary(true); - } + if ( ext_scorer != nullptr + && !ext_scorer->is_character_based() + && ext_scorer->_dictionary == nullptr) { + // init dictionary + ext_scorer->set_char_map(vocabulary); + ext_scorer->fill_dictionary(true); } // enqueue the tasks of decoding std::vector>>> res; @@ -308,7 +299,7 @@ std::vector>> ); } // get decoding results - std::vector>> batch_results; + std::vector > > batch_results; for (int i = 0; i < batch_size; i++) { batch_results.emplace_back(res[i].get()); } diff --git a/deploy/decoder_utils.cpp b/deploy/decoder_utils.cpp index 0ec86d6bc..39beb811e 100644 --- a/deploy/decoder_utils.cpp +++ b/deploy/decoder_utils.cpp @@ -15,7 +15,7 @@ size_t get_utf8_str_len(const std::string& str) { //Splits string into vector of strings representing //UTF-8 characters (not same as chars) //------------------------------------------------------ -std::vector UTF8_split(const std::string& str) +std::vector split_utf8_str(const std::string& str) { std::vector result; std::string out_str; @@ -37,6 +37,29 @@ std::vector UTF8_split(const std::string& str) return result; } +// Split a string into a list of strings on a given string +// delimiter. NB: delimiters on beginning / end of string are +// trimmed. Eg, "FooBarFoo" split on "Foo" returns ["Bar"]. +std::vector split_str(const std::string &s, + const std::string &delim) { + std::vector result; + std::size_t start = 0, delim_len = delim.size(); + while (true) { + std::size_t end = s.find(delim, start); + if (end == std::string::npos) { + if (start < s.size()) { + result.push_back(s.substr(start)); + } + break; + } + if (end > start) { + result.push_back(s.substr(start, end - start)); + } + start = end + delim_len; + } + return result; +} + //------------------------------------------------------- // Overriding less than operator for sorting //------------------------------------------------------- @@ -80,7 +103,7 @@ bool add_word_to_dictionary(const std::string& word, bool add_space, int SPACE, fst::StdVectorFst* dictionary) { - auto characters = UTF8_split(word); + auto characters = split_utf8_str(word); std::vector int_word; diff --git a/deploy/decoder_utils.h b/deploy/decoder_utils.h index b61cdfbfe..936605868 100644 --- a/deploy/decoder_utils.h +++ b/deploy/decoder_utils.h @@ -4,14 +4,19 @@ #include #include "path_trie.h" +const float NUM_FLT_INF = std::numeric_limits::max(); +const float NUM_FLT_MIN = std::numeric_limits::min(); + template -bool pair_comp_first_rev(const std::pair &a, const std::pair &b) +bool pair_comp_first_rev(const std::pair &a, + const std::pair &b) { return a.first > b.first; } template -bool pair_comp_second_rev(const std::pair &a, const std::pair &b) +bool pair_comp_second_rev(const std::pair &a, + const std::pair &b) { return a.second > b.second; } @@ -26,16 +31,18 @@ T log_sum_exp(const T &x, const T &y) return std::log(std::exp(x-xmax) + std::exp(y-xmax)) + xmax; } -//------------------------------------------------------- -// Overriding less than operator for sorting -//------------------------------------------------------- + +// Functor for prefix comparsion bool prefix_compare(const PathTrie* x, const PathTrie* y); // Get length of utf8 encoding string // See: http://stackoverflow.com/a/4063229 size_t get_utf8_str_len(const std::string& str); -std::vector UTF8_split(const std::string &str); +std::vector split_str(const std::string &s, + const std::string &delim); + +std::vector split_utf8_str(const std::string &str); void add_word_to_fst(const std::vector& word, fst::StdVectorFst* dictionary); diff --git a/deploy/path_trie.cpp b/deploy/path_trie.cpp index 6cf7ae515..b841831d7 100644 --- a/deploy/path_trie.cpp +++ b/deploy/path_trie.cpp @@ -8,12 +8,11 @@ #include "decoder_utils.h" PathTrie::PathTrie() { - float lowest = -1.0*std::numeric_limits::max(); - _log_prob_b_prev = lowest; - _log_prob_nb_prev = lowest; - _log_prob_b_cur = lowest; - _log_prob_nb_cur = lowest; - _score = lowest; + _log_prob_b_prev = -NUM_FLT_INF; + _log_prob_nb_prev = -NUM_FLT_INF; + _log_prob_b_cur = -NUM_FLT_INF; + _log_prob_nb_cur = -NUM_FLT_INF; + _score = -NUM_FLT_INF; _ROOT = -1; _character = _ROOT; @@ -41,11 +40,10 @@ PathTrie* PathTrie::get_path_trie(int new_char, bool reset) { if ( child != _children.end() ) { if (!child->second->_exists) { child->second->_exists = true; - float lowest = -1.0*std::numeric_limits::max(); - child->second->_log_prob_b_prev = lowest; - child->second->_log_prob_nb_prev = lowest; - child->second->_log_prob_b_cur = lowest; - child->second->_log_prob_nb_cur = lowest; + child->second->_log_prob_b_prev = -NUM_FLT_INF; + child->second->_log_prob_nb_prev = -NUM_FLT_INF; + child->second->_log_prob_b_cur = -NUM_FLT_INF; + child->second->_log_prob_nb_cur = -NUM_FLT_INF; } return (child->second); } else { @@ -106,8 +104,8 @@ void PathTrie::iterate_to_vec( _log_prob_b_prev = _log_prob_b_cur; _log_prob_nb_prev = _log_prob_nb_cur; - _log_prob_b_cur = -1.0 * std::numeric_limits::max(); - _log_prob_nb_cur = -1.0 * std::numeric_limits::max(); + _log_prob_b_cur = -NUM_FLT_INF; + _log_prob_nb_cur = -NUM_FLT_INF; _score = log_sum_exp(_log_prob_b_prev, _log_prob_nb_prev); output.push_back(this); @@ -117,9 +115,6 @@ void PathTrie::iterate_to_vec( } } -//------------------------------------------------------- -// Effectively removes node -//------------------------------------------------------- void PathTrie::remove() { _exists = false; diff --git a/deploy/scorer.cpp b/deploy/scorer.cpp index ad33a0cda..41f3894ab 100644 --- a/deploy/scorer.cpp +++ b/deploy/scorer.cpp @@ -17,7 +17,7 @@ Scorer::Scorer(double alpha, double beta, const std::string& lm_path) { _language_model = nullptr; _dictionary = nullptr; _max_order = 0; - _SPACE = -1; + _SPACE_ID = -1; // load language model load_LM(lm_path.c_str()); } @@ -61,7 +61,7 @@ double Scorer::get_log_cond_prob(const std::vector& words) { lm::WordIndex word_index = model->BaseVocabulary().Index(words[i]); // encounter OOV if (word_index == 0) { - return OOV_SCOER; + return OOV_SCORE; } cond_prob = model->BaseScore(&state, word_index, &out_state); tmp_state = state; @@ -197,64 +197,27 @@ Scorer::split_labels(const std::vector &labels) { std::string s = vec2str(labels); std::vector words; if (_is_character_based) { - words = UTF8_split(s); + words = split_utf8_str(s); } else { words = split_str(s, " "); } return words; } -// Split a string into a list of strings on a given string -// delimiter. NB: delimiters on beginning / end of string are -// trimmed. Eg, "FooBarFoo" split on "Foo" returns ["Bar"]. -std::vector Scorer::split_str(const std::string &s, - const std::string &delim) { - std::vector result; - std::size_t start = 0, delim_len = delim.size(); - while (true) { - std::size_t end = s.find(delim, start); - if (end == std::string::npos) { - if (start < s.size()) { - result.push_back(s.substr(start)); - } - break; - } - if (end > start) { - result.push_back(s.substr(start, end - start)); - } - start = end + delim_len; - } - return result; -} - -//--------------------------------------------------- -// Add index to char list for searching language model -//--------------------------------------------------- void Scorer::set_char_map(std::vector char_list) { _char_list = char_list; - std::string _SPACE_STR = " "; - - for (unsigned int i = 0; i < _char_list.size(); i++) { - // if (_char_list[i] == _BLANK_STR) { - // _BLANK = i; - // } else - if (_char_list[i] == _SPACE_STR) { - _SPACE = i; - } - } - _char_map.clear(); + for(unsigned int i = 0; i < _char_list.size(); i++) { - if(i == (unsigned int)_SPACE){ + if (_char_list[i] == " ") { + _SPACE_ID = i; _char_map[' '] = i; - } - else if(_char_list[i].size() == 1){ + } else if(_char_list[i].size() == 1){ _char_map[_char_list[i][0]] = i; } } - -} //------------- End of set_char_map ---------------- +} std::vector Scorer::make_ngram(PathTrie* prefix) { std::vector ngram; @@ -265,10 +228,10 @@ std::vector Scorer::make_ngram(PathTrie* prefix) { std::vector prefix_vec; if (_is_character_based) { - new_node = current_node->get_path_vec(prefix_vec, _SPACE, 1); + new_node = current_node->get_path_vec(prefix_vec, _SPACE_ID, 1); current_node = new_node; } else { - new_node = current_node->get_path_vec(prefix_vec, _SPACE); + new_node = current_node->get_path_vec(prefix_vec, _SPACE_ID); current_node = new_node->_parent; // Skipping spaces } @@ -279,7 +242,7 @@ std::vector Scorer::make_ngram(PathTrie* prefix) { if (new_node->_character == -1) { // No more spaces, but still need order for (int i = 0; i < _max_order - order - 1; i++) { - ngram.push_back(""); + ngram.push_back(START_TOKEN); } break; } @@ -288,10 +251,6 @@ std::vector Scorer::make_ngram(PathTrie* prefix) { return ngram; } -//--------------------------------------------------------- -// Helper function to populate Trie with a vocab using the -// char_list for maping from string to int -//--------------------------------------------------------- void Scorer::fill_dictionary(bool add_space) { fst::StdVectorFst dictionary; @@ -307,7 +266,7 @@ void Scorer::fill_dictionary(bool add_space) { bool added = add_word_to_dictionary(word, char_map, add_space, - _SPACE, + _SPACE_ID, &dictionary); vocab_size += added ? 1 : 0; } diff --git a/deploy/scorer.h b/deploy/scorer.h index 9ba55dd6d..17a5f1aa6 100644 --- a/deploy/scorer.h +++ b/deploy/scorer.h @@ -11,7 +11,7 @@ #include "util/string_piece.hh" #include "path_trie.h" -const double OOV_SCOER = -1000.0; +const double OOV_SCORE = -1000.0; const std::string START_TOKEN = ""; const std::string UNK_TOKEN = ""; const std::string END_TOKEN = ""; @@ -68,18 +68,13 @@ protected: double get_log_prob(const std::vector& words); std::string vec2str(const std::vector &input); std::vector split_labels(const std::vector &labels); - std::vector split_str(const std::string &s, - const std::string &delim); private: - void _init_char_list(); - void _init_char_map(); - void* _language_model; bool _is_character_based; size_t _max_order; - unsigned int _SPACE; + int _SPACE_ID; std::vector _char_list; std::unordered_map _char_map; From a661941ae79f09a871ac27e735726ec3156d6a10 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Tue, 29 Aug 2017 19:22:52 +0800 Subject: [PATCH 156/335] remove unused functions in Scorer --- deploy/ctc_decoders.cpp | 6 +-- deploy/scorer.cpp | 85 ++--------------------------------------- deploy/scorer.h | 9 +---- 3 files changed, 8 insertions(+), 92 deletions(-) diff --git a/deploy/ctc_decoders.cpp b/deploy/ctc_decoders.cpp index da37708af..9304c780b 100644 --- a/deploy/ctc_decoders.cpp +++ b/deploy/ctc_decoders.cpp @@ -96,13 +96,13 @@ std::vector > prefixes.push_back(&root); if ( ext_scorer != nullptr && !ext_scorer->is_character_based()) { - if (ext_scorer->_dictionary == nullptr) { + if (ext_scorer->dictionary == nullptr) { // TODO: init dictionary ext_scorer->set_char_map(vocabulary); // add_space should be true? ext_scorer->fill_dictionary(true); } - auto fst_dict = static_cast(ext_scorer->_dictionary); + auto fst_dict = static_cast(ext_scorer->dictionary); fst::StdVectorFst* dict_ptr = fst_dict->Copy(true); root.set_dictionary(dict_ptr); auto matcher = std::make_shared(*dict_ptr, fst::MATCH_INPUT); @@ -285,7 +285,7 @@ std::vector > > // dictionary init if ( ext_scorer != nullptr && !ext_scorer->is_character_based() - && ext_scorer->_dictionary == nullptr) { + && ext_scorer->dictionary == nullptr) { // init dictionary ext_scorer->set_char_map(vocabulary); ext_scorer->fill_dictionary(true); diff --git a/deploy/scorer.cpp b/deploy/scorer.cpp index 41f3894ab..ced71995b 100644 --- a/deploy/scorer.cpp +++ b/deploy/scorer.cpp @@ -15,7 +15,7 @@ Scorer::Scorer(double alpha, double beta, const std::string& lm_path) { this->beta = beta; _is_character_based = true; _language_model = nullptr; - _dictionary = nullptr; + dictionary = nullptr; _max_order = 0; _SPACE_ID = -1; // load language model @@ -25,8 +25,8 @@ Scorer::Scorer(double alpha, double beta, const std::string& lm_path) { Scorer::~Scorer() { if (_language_model != nullptr) delete static_cast(_language_model); - if (_dictionary != nullptr) - delete static_cast(_dictionary); + if (dictionary != nullptr) + delete static_cast(dictionary); } void Scorer::load_LM(const char* filename) { @@ -99,87 +99,11 @@ double Scorer::get_log_prob(const std::vector& words) { return score; } -/* Strip a input sentence - * Parameters: - * str: A reference to the objective string - * ch: The character to prune - * Return: - * void - */ -inline void strip(std::string &str, char ch=' ') { - if (str.size() == 0) return; - int start = 0; - int end = str.size()-1; - for (int i=0; i=0; i--) { - if (str[i] == ch) { - end --; - } else { - break; - } - } - - if (start == 0 && end == str.size()-1) return; - if (start > end) { - std::string emp_str; - str = emp_str; - } else { - str = str.substr(start, end-start+1); - } -} - -int Scorer::word_count(std::string sentence) { - strip(sentence); - int cnt = 1; - for (int i=0; i_language_model; - State state, out_state; - lm::FullScoreReturn ret; - model->BeginSentenceWrite(&state); - - for (util::TokenIter it(sentence, ' '); it; ++it){ - lm::WordIndex wid = model->BaseVocabulary().Index(*it); - ret = model->BaseFullScore(&state, wid, &out_state); - state = out_state; - } - //log10 prob - double log_prob = ret.prob; - return log_prob; -} - void Scorer::reset_params(float alpha, float beta) { this->alpha = alpha; this->beta = beta; } -double Scorer::get_score(std::string sentence, bool log) { - double lm_score = get_log_cond_prob(sentence); - int word_cnt = word_count(sentence); - - double final_score = 0.0; - if (log == false) { - final_score = pow(10, alpha * lm_score) * pow(word_cnt, beta); - } else { - final_score = alpha * lm_score * std::log(10) - + beta * std::log(word_cnt); - } - return final_score; -} - std::string Scorer::vec2str(const std::vector& input) { std::string word; for (auto ind : input) { @@ -188,7 +112,6 @@ std::string Scorer::vec2str(const std::vector& input) { return word; } - std::vector Scorer::split_labels(const std::vector &labels) { if (labels.empty()) @@ -291,6 +214,6 @@ void Scorer::fill_dictionary(bool add_space) { // Finds the simplest equivalent fst. This is unnecessary but decreases // memory usage of the dictionary fst::Minimize(new_dict); - _dictionary = new_dict; + this->dictionary = new_dict; } diff --git a/deploy/scorer.h b/deploy/scorer.h index 17a5f1aa6..e5bfecaf8 100644 --- a/deploy/scorer.h +++ b/deploy/scorer.h @@ -42,15 +42,8 @@ public: double get_sent_log_prob(const std::vector& words); size_t get_max_order() { return _max_order; } bool is_character_based() { return _is_character_based; } - std::vector get_vocab() { return _vocabulary; } - // word insertion term - int word_count(std::string); - // get the log cond prob of the last word - double get_log_cond_prob(std::string); // reset params alpha & beta void reset_params(float alpha, float beta); - // get the final score - double get_score(std::string, bool log=false); // make ngram std::vector make_ngram(PathTrie* prefix); // fill dictionary for fst @@ -61,7 +54,7 @@ public: double alpha; double beta; // fst dictionary - void* _dictionary; + void* dictionary; protected: void load_LM(const char* filename); From a0c89ae7e030b935dd605f031f1128fa6a09473c Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Wed, 30 Aug 2017 13:01:44 +0800 Subject: [PATCH 157/335] add min cutoff & top n cutoff --- deploy.py | 14 +++++-- deploy/ctc_decoders.cpp | 71 +++++++++++++++++++++------------ deploy/ctc_decoders.h | 2 + deploy/scorer.h | 2 +- deploy/swig_decoders_wrapper.py | 22 +++++++--- 5 files changed, 75 insertions(+), 36 deletions(-) diff --git a/deploy.py b/deploy.py index d43ab1e0f..60bdcb0c5 100644 --- a/deploy.py +++ b/deploy.py @@ -18,7 +18,7 @@ import time parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( "--num_samples", - default=4, + default=10, type=int, help="Number of samples for inference. (default: %(default)s)") parser.add_argument( @@ -95,12 +95,12 @@ parser.add_argument( help="Path for language model. (default: %(default)s)") parser.add_argument( "--alpha", - default=0.26, + default=1.5, type=float, help="Parameter associated with language model. (default: %(default)f)") parser.add_argument( "--beta", - default=0.1, + default=0.3, type=float, help="Parameter associated with word count. (default: %(default)f)") parser.add_argument( @@ -109,6 +109,12 @@ parser.add_argument( type=float, help="The cutoff probability of pruning" "in beam search. (default: %(default)f)") +parser.add_argument( + "--cutoff_top_n", + default=40, + type=int, + help="The cutoff number of pruning" + "in beam search. (default: %(default)f)") args = parser.parse_args() @@ -184,6 +190,7 @@ def infer(): vocabulary=data_generator.vocab_list, blank_id=len(data_generator.vocab_list), cutoff_prob=args.cutoff_prob, + cutoff_top_n=args.cutoff_top_n, ext_scoring_func=ext_scorer, ) batch_beam_results += [beam_result] else: @@ -194,6 +201,7 @@ def infer(): blank_id=len(data_generator.vocab_list), num_processes=args.num_processes_beam_search, cutoff_prob=args.cutoff_prob, + cutoff_top_n=args.cutoff_top_n, ext_scoring_func=ext_scorer, ) for i, beam_result in enumerate(batch_beam_results): diff --git a/deploy/ctc_decoders.cpp b/deploy/ctc_decoders.cpp index 9304c780b..7933b01d0 100644 --- a/deploy/ctc_decoders.cpp +++ b/deploy/ctc_decoders.cpp @@ -62,6 +62,7 @@ std::vector > std::vector vocabulary, int blank_id, double cutoff_prob, + int cutoff_top_n, Scorer *ext_scorer) { // dimension check @@ -116,19 +117,33 @@ std::vector > prob_idx.push_back(std::pair(i, prob[i])); } + float min_cutoff = -NUM_FLT_INF; + bool full_beam = false; + if (ext_scorer != nullptr) { + int num_prefixes = std::min((int)prefixes.size(), beam_size); + std::sort(prefixes.begin(), prefixes.begin() + num_prefixes, + prefix_compare); + min_cutoff = prefixes[num_prefixes-1]->_score + log(prob[blank_id]) + - std::max(0.0, ext_scorer->beta); + full_beam = (num_prefixes == beam_size); + } + // pruning of vacobulary int cutoff_len = prob.size(); - if (cutoff_prob < 1.0) { + if (cutoff_prob < 1.0 || cutoff_top_n < prob.size()) { std::sort(prob_idx.begin(), prob_idx.end(), pair_comp_second_rev); - double cum_prob = 0.0; - cutoff_len = 0; - for (int i=0; i= cutoff_prob) break; + if (cutoff_prob < 1.0) { + double cum_prob = 0.0; + cutoff_len = 0; + for (int i=0; i= cutoff_prob) break; + } } + cutoff_len = std::min(cutoff_len, cutoff_top_n); prob_idx = std::vector >( prob_idx.begin(), prob_idx.begin() + cutoff_len); } @@ -138,15 +153,17 @@ std::vector > log_prob_idx.push_back(std::pair (prob_idx[i].first, log(prob_idx[i].second + NUM_FLT_MIN))); } - // loop over chars for (int index = 0; index < log_prob_idx.size(); index++) { auto c = log_prob_idx[index].first; float log_prob_c = log_prob_idx[index].second; - //float log_probs_prev; for (int i = 0; i < prefixes.size() && i_score < min_cutoff) { + break; + } // blank if (c == blank_id) { prefix->_log_prob_b_cur = log_sum_exp( @@ -178,7 +195,7 @@ std::vector > (c == space_id || ext_scorer->is_character_based()) ) { PathTrie *prefix_to_score = nullptr; - // don't score the space + // skip scoring the space if (ext_scorer->is_character_based()) { prefix_to_score = prefix_new; } else { @@ -202,10 +219,10 @@ std::vector > } // end of loop over chars prefixes.clear(); - // update log probabilities + // update log probs root.iterate_to_vec(prefixes); - // sort prefixes by score + // preserve top beam_size prefixes if (prefixes.size() >= beam_size) { std::nth_element(prefixes.begin(), prefixes.begin() + beam_size, @@ -218,18 +235,20 @@ std::vector > } } + // compute aproximate ctc score as the return score for (size_t i = 0; i < beam_size && i < prefixes.size(); i++) { double approx_ctc = prefixes[i]->_score; - // remove word insert: - std::vector output; - prefixes[i]->get_path_vec(output); - size_t prefix_length = output.size(); - // remove language model weight: if (ext_scorer != nullptr) { - // auto words = split_labels(output); - // approx_ctc = approx_ctc - path_length * ext_scorer->beta; - // approx_ctc -= (_lm->get_sent_log_prob(words)) * ext_scorer->alpha; + std::vector output; + prefixes[i]->get_path_vec(output); + size_t prefix_length = output.size(); + auto words = ext_scorer->split_labels(output); + // remove word insert + approx_ctc = approx_ctc - prefix_length * ext_scorer->beta; + // remove language model weight: + approx_ctc -= (ext_scorer->get_sent_log_prob(words)) + * ext_scorer->alpha; } prefixes[i]->_approx_ctc = approx_ctc; @@ -253,11 +272,9 @@ std::vector > for (int j = 0; j < output.size(); j++) { output_str += vocabulary[output[j]]; } - std::pair output_pair(space_prefixes[i]->_score, - output_str); - output_vecs.emplace_back( - output_pair - ); + std::pair + output_pair(-space_prefixes[i]->_approx_ctc, output_str); + output_vecs.emplace_back(output_pair); } return output_vecs; @@ -272,6 +289,7 @@ std::vector > > int blank_id, int num_processes, double cutoff_prob, + int cutoff_top_n, Scorer *ext_scorer ) { if (num_processes <= 0) { @@ -295,7 +313,8 @@ std::vector > > for (int i = 0; i < batch_size; i++) { res.emplace_back( pool.enqueue(ctc_beam_search_decoder, probs_split[i], - beam_size, vocabulary, blank_id, cutoff_prob, ext_scorer) + beam_size, vocabulary, blank_id, cutoff_prob, + cutoff_top_n, ext_scorer) ); } // get decoding results diff --git a/deploy/ctc_decoders.h b/deploy/ctc_decoders.h index 238903820..f339cbd07 100644 --- a/deploy/ctc_decoders.h +++ b/deploy/ctc_decoders.h @@ -39,6 +39,7 @@ std::vector > std::vector vocabulary, int blank_id, double cutoff_prob=1.0, + int cutoff_top_n=40, Scorer *ext_scorer=NULL ); @@ -66,6 +67,7 @@ std::vector>> int blank_id, int num_processes, double cutoff_prob=1.0, + int cutoff_top_n=40, Scorer *ext_scorer=NULL ); diff --git a/deploy/scorer.h b/deploy/scorer.h index e5bfecaf8..7d7ce430b 100644 --- a/deploy/scorer.h +++ b/deploy/scorer.h @@ -50,6 +50,7 @@ public: void fill_dictionary(bool add_space); // set char map void set_char_map(std::vector char_list); + std::vector split_labels(const std::vector &labels); // expose to decoder double alpha; double beta; @@ -60,7 +61,6 @@ protected: void load_LM(const char* filename); double get_log_prob(const std::vector& words); std::string vec2str(const std::vector &input); - std::vector split_labels(const std::vector &labels); private: void* _language_model; diff --git a/deploy/swig_decoders_wrapper.py b/deploy/swig_decoders_wrapper.py index 51f3173b2..b44fae0ae 100644 --- a/deploy/swig_decoders_wrapper.py +++ b/deploy/swig_decoders_wrapper.py @@ -43,6 +43,7 @@ def ctc_beam_search_decoder(probs_seq, vocabulary, blank_id, cutoff_prob=1.0, + cutoff_top_n=40, ext_scoring_func=None): """Wrapper for the CTC Beam Search Decoder. @@ -59,6 +60,10 @@ def ctc_beam_search_decoder(probs_seq, :param cutoff_prob: Cutoff probability in pruning, default 1.0, no pruning. :type cutoff_prob: float + :param cutoff_top_n: Cutoff number in pruning, only top cutoff_top_n + characters with highest probs in vocabulary will be + used in beam search, default 40. + :type cutoff_top_n: int :param ext_scoring_func: External scoring function for partially decoded sentence, e.g. word count or language model. @@ -67,9 +72,9 @@ def ctc_beam_search_decoder(probs_seq, results, in descending order of the probability. :rtype: list """ - return swig_decoders.ctc_beam_search_decoder(probs_seq.tolist(), beam_size, - vocabulary, blank_id, - cutoff_prob, ext_scoring_func) + return swig_decoders.ctc_beam_search_decoder( + probs_seq.tolist(), beam_size, vocabulary, blank_id, cutoff_prob, + cutoff_top_n, ext_scoring_func) def ctc_beam_search_decoder_batch(probs_split, @@ -78,6 +83,7 @@ def ctc_beam_search_decoder_batch(probs_split, blank_id, num_processes, cutoff_prob=1.0, + cutoff_top_n=40, ext_scoring_func=None): """Wrapper for the batched CTC beam search decoder. @@ -92,11 +98,15 @@ def ctc_beam_search_decoder_batch(probs_split, :type blank_id: int :param num_processes: Number of parallel processes. :type num_processes: int - :param cutoff_prob: Cutoff probability in pruning, + :param cutoff_prob: Cutoff probability in vocabulary pruning, default 1.0, no pruning. + :type cutoff_prob: float + :param cutoff_top_n: Cutoff number in pruning, only top cutoff_top_n + characters with highest probs in vocabulary will be + used in beam search, default 40. + :type cutoff_top_n: int :param num_processes: Number of parallel processes. :type num_processes: int - :type cutoff_prob: float :param ext_scoring_func: External scoring function for partially decoded sentence, e.g. word count or language model. @@ -109,4 +119,4 @@ def ctc_beam_search_decoder_batch(probs_split, return swig_decoders.ctc_beam_search_decoder_batch( probs_split, beam_size, vocabulary, blank_id, num_processes, - cutoff_prob, ext_scoring_func) + cutoff_prob, cutoff_top_n, ext_scoring_func) From a2ddfe8d9ed05223d495bba94e110b73ac0b6019 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Wed, 30 Aug 2017 18:29:21 +0800 Subject: [PATCH 158/335] clean up code & update README for decoder in deployment --- deploy.py | 43 +++++++++++++++++++----------- deploy/README.md | 13 ++++++--- deploy/ctc_decoders.cpp | 57 ++++++++++++++++++++++++---------------- deploy/ctc_decoders.h | 6 +++-- deploy/decoder_utils.cpp | 28 +++++--------------- deploy/decoder_utils.h | 11 ++++++-- deploy/path_trie.cpp | 2 +- deploy/scorer.h | 16 ++++++++++- 8 files changed, 106 insertions(+), 70 deletions(-) diff --git a/deploy.py b/deploy.py index 60bdcb0c5..11972f5f7 100644 --- a/deploy.py +++ b/deploy.py @@ -9,7 +9,7 @@ import distutils.util import multiprocessing import paddle.v2 as paddle from data_utils.data import DataGenerator -from model import deep_speech2 +from layer import deep_speech2 from deploy.swig_decoders_wrapper import * from error_rate import wer import utils @@ -79,7 +79,7 @@ parser.add_argument( "(default: %(default)s)") parser.add_argument( "--beam_size", - default=20, + default=500, type=int, help="Width for beam search decoding. (default: %(default)d)") parser.add_argument( @@ -89,8 +89,7 @@ parser.add_argument( help="Number of output per sample in beam search. (default: %(default)d)") parser.add_argument( "--language_model_path", - default="/home/work/liuyibing/lm_bak/common_crawl_00.prune01111.trie.klm", - #default="ptb_all.arpa", + default="lm/data/common_crawl_00.prune01111.trie.klm", type=str, help="Path for language model. (default: %(default)s)") parser.add_argument( @@ -136,14 +135,13 @@ def infer(): text_data = paddle.layer.data( name="transcript_text", type=paddle.data_type.integer_value_sequence(data_generator.vocab_size)) - output_probs = deep_speech2( + output_probs, _ = deep_speech2( audio_data=audio_data, text_data=text_data, dict_size=data_generator.vocab_size, num_conv_layers=args.num_conv_layers, num_rnn_layers=args.num_rnn_layers, - rnn_size=args.rnn_layer_size, - is_inference=True) + rnn_size=args.rnn_layer_size) # load parameters parameters = paddle.parameters.Parameters.from_tar( @@ -159,8 +157,10 @@ def infer(): infer_data = batch_reader().next() # run inference - infer_results = paddle.infer( - output_layer=output_probs, parameters=parameters, input=infer_data) + inferer = paddle.inference.Inference( + output_layer=output_probs, parameters=parameters) + infer_results = inferer.infer(input=infer_data) + num_steps = len(infer_results) // len(infer_data) probs_split = [ infer_results[i * num_steps:(i + 1) * num_steps] @@ -178,17 +178,29 @@ def infer(): ext_scorer = Scorer( alpha=args.alpha, beta=args.beta, model_path=args.language_model_path) + # from unicode to string + vocab_list = [chars.encode("utf-8") for chars in data_generator.vocab_list] + + # The below two steps, i.e. setting char map and filling dictionary of + # FST will be completed implicitly when ext_scorer first used.But to save + # the time of decoding the first audio sample, they are done in advance. + ext_scorer.set_char_map(vocab_list) + # only for ward based language model + ext_scorer.fill_dictionary(True) + + # for word error rate metric + wer_sum, wer_counter = 0.0, 0 + ## decode and print time_begin = time.time() - wer_sum, wer_counter = 0, 0 batch_beam_results = [] if args.decode_method == 'beam_search': for i, probs in enumerate(probs_split): beam_result = ctc_beam_search_decoder( probs_seq=probs, beam_size=args.beam_size, - vocabulary=data_generator.vocab_list, - blank_id=len(data_generator.vocab_list), + vocabulary=vocab_list, + blank_id=len(vocab_list), cutoff_prob=args.cutoff_prob, cutoff_top_n=args.cutoff_top_n, ext_scoring_func=ext_scorer, ) @@ -197,8 +209,8 @@ def infer(): batch_beam_results = ctc_beam_search_decoder_batch( probs_split=probs_split, beam_size=args.beam_size, - vocabulary=data_generator.vocab_list, - blank_id=len(data_generator.vocab_list), + vocabulary=vocab_list, + blank_id=len(vocab_list), num_processes=args.num_processes_beam_search, cutoff_prob=args.cutoff_prob, cutoff_top_n=args.cutoff_top_n, @@ -213,8 +225,7 @@ def infer(): print("cur wer = %f , average wer = %f" % (wer_cur, wer_sum / wer_counter)) - time_end = time.time() - print("total time = %f" % (time_end - time_begin)) + print("time for decoding = %f" % (time.time() - time_begin)) def main(): diff --git a/deploy/README.md b/deploy/README.md index 9f2be76e8..e817be105 100644 --- a/deploy/README.md +++ b/deploy/README.md @@ -1,5 +1,9 @@ + +The decoders for deployment developed in C++ are a better alternative for the prototype decoders in Pytthon, with more powerful performance in both speed and accuracy. + ### Installation -The build of the decoder for deployment depends on several open-sourced projects, first clone or download them to current directory (i.e., `deep_speech_2/deploy`) + +The build depends on several open-sourced projects, first clone or download them to current directory (i.e., `deep_speech_2/deploy`) - [**KenLM**](https://github.com/kpu/kenlm/): Faster and Smaller Language Model Queries @@ -14,7 +18,6 @@ wget http://www.openfst.org/twiki/pub/FST/FstDownload/openfst-1.6.3.tar.gz tar -xzvf openfst-1.6.3.tar.gz ``` -- [**SWIG**](http://www.swig.org): Compiling for python interface requires swig, please make sure swig being installed. - [**ThreadPool**](http://progsch.net/wordpress/): A library for C++ thread pool @@ -22,6 +25,8 @@ tar -xzvf openfst-1.6.3.tar.gz git clone https://github.com/progschj/ThreadPool.git ``` +- [**SWIG**](http://www.swig.org): A tool that provides the Python interface for the decoders, please make sure it being installed. + Then run the setup ```shell @@ -29,7 +34,9 @@ python setup.py install --num_processes 4 cd .. ``` -### Deployment +### Usage + +The decoders for deployment share almost the same interface with the prototye decoders in Python. After the installation succeeds, these decoders are very convenient for call in Python, and a complete example in ```deploy.py``` can be refered. For GPU deployment diff --git a/deploy/ctc_decoders.cpp b/deploy/ctc_decoders.cpp index 7933b01d0..4e94edfbb 100644 --- a/deploy/ctc_decoders.cpp +++ b/deploy/ctc_decoders.cpp @@ -90,26 +90,32 @@ std::vector > space_id = -2; } - // init + // init prefixes' root PathTrie root; root._score = root._log_prob_b_prev = 0.0; std::vector prefixes; prefixes.push_back(&root); - if ( ext_scorer != nullptr && !ext_scorer->is_character_based()) { - if (ext_scorer->dictionary == nullptr) { - // TODO: init dictionary + if ( ext_scorer != nullptr) { + if (ext_scorer->is_char_map_empty()) { ext_scorer->set_char_map(vocabulary); - // add_space should be true? - ext_scorer->fill_dictionary(true); } - auto fst_dict = static_cast(ext_scorer->dictionary); - fst::StdVectorFst* dict_ptr = fst_dict->Copy(true); - root.set_dictionary(dict_ptr); - auto matcher = std::make_shared(*dict_ptr, fst::MATCH_INPUT); - root.set_matcher(matcher); + if (!ext_scorer->is_character_based()) { + if (ext_scorer->dictionary == nullptr) { + // fill dictionary for fst + ext_scorer->fill_dictionary(true); + } + auto fst_dict = static_cast + (ext_scorer->dictionary); + fst::StdVectorFst* dict_ptr = fst_dict->Copy(true); + root.set_dictionary(dict_ptr); + auto matcher = std::make_shared + (*dict_ptr, fst::MATCH_INPUT); + root.set_matcher(matcher); + } } + // prefix search over time for (int time_step = 0; time_step < num_time_steps; time_step++) { std::vector prob = probs_seq[time_step]; std::vector > prob_idx; @@ -147,12 +153,12 @@ std::vector > prob_idx = std::vector >( prob_idx.begin(), prob_idx.begin() + cutoff_len); } - std::vector > log_prob_idx; for (int i = 0; i < cutoff_len; i++) { log_prob_idx.push_back(std::pair (prob_idx[i].first, log(prob_idx[i].second + NUM_FLT_MIN))); } + // loop over chars for (int index = 0; index < log_prob_idx.size(); index++) { auto c = log_prob_idx[index].first; @@ -214,15 +220,14 @@ std::vector > prefix_new->_log_prob_nb_cur = log_sum_exp( prefix_new->_log_prob_nb_cur, log_p); } - } - + } // end of loop over prefix } // end of loop over chars prefixes.clear(); // update log probs root.iterate_to_vec(prefixes); - // preserve top beam_size prefixes + // only preserve top beam_size prefixes if (prefixes.size() >= beam_size) { std::nth_element(prefixes.begin(), prefixes.begin() + beam_size, @@ -233,7 +238,7 @@ std::vector > prefixes[i]->remove(); } } - } + } // end of loop over time // compute aproximate ctc score as the return score for (size_t i = 0; i < beam_size && i < prefixes.size(); i++) { @@ -300,14 +305,19 @@ std::vector > > ThreadPool pool(num_processes); // number of samples int batch_size = probs_split.size(); - // dictionary init - if ( ext_scorer != nullptr - && !ext_scorer->is_character_based() - && ext_scorer->dictionary == nullptr) { - // init dictionary - ext_scorer->set_char_map(vocabulary); - ext_scorer->fill_dictionary(true); + + // scorer filling up + if ( ext_scorer != nullptr) { + if (ext_scorer->is_char_map_empty()) { + ext_scorer->set_char_map(vocabulary); + } + if(!ext_scorer->is_character_based() + && ext_scorer->dictionary == nullptr) { + // init dictionary + ext_scorer->fill_dictionary(true); + } } + // enqueue the tasks of decoding std::vector>>> res; for (int i = 0; i < batch_size; i++) { @@ -317,6 +327,7 @@ std::vector > > cutoff_top_n, ext_scorer) ); } + // get decoding results std::vector > > batch_results; for (int i = 0; i < batch_size; i++) { diff --git a/deploy/ctc_decoders.h b/deploy/ctc_decoders.h index f339cbd07..58d2b7895 100644 --- a/deploy/ctc_decoders.h +++ b/deploy/ctc_decoders.h @@ -27,7 +27,8 @@ std::string ctc_best_path_decoder(std::vector > probs_seq, * beam_size: The width of beam search. * vocabulary: A vector of vocabulary. * blank_id: ID of blank. - * cutoff_prob: Cutoff probability of pruning + * cutoff_prob: Cutoff probability for pruning. + * cutoff_top_n: Cutoff number for pruning. * ext_scorer: External scorer to evaluate a prefix. * Return: * A vector that each element is a pair of score and decoding result, @@ -54,7 +55,8 @@ std::vector > * vocabulary: A vector of vocabulary. * blank_id: ID of blank. * num_processes: Number of threads for beam search. - * cutoff_prob: Cutoff probability of pruning + * cutoff_prob: Cutoff probability for pruning. + * cutoff_top_n: Cutoff number for pruning. * ext_scorer: External scorer to evaluate a prefix. * Return: * A 2-D vector that each element is a vector of decoding result for one diff --git a/deploy/decoder_utils.cpp b/deploy/decoder_utils.cpp index 39beb811e..37674f71e 100644 --- a/deploy/decoder_utils.cpp +++ b/deploy/decoder_utils.cpp @@ -11,10 +11,6 @@ size_t get_utf8_str_len(const std::string& str) { return str_len; } -//------------------------------------------------------ -//Splits string into vector of strings representing -//UTF-8 characters (not same as chars) -//------------------------------------------------------ std::vector split_utf8_str(const std::string& str) { std::vector result; @@ -37,9 +33,6 @@ std::vector split_utf8_str(const std::string& str) return result; } -// Split a string into a list of strings on a given string -// delimiter. NB: delimiters on beginning / end of string are -// trimmed. Eg, "FooBarFoo" split on "Foo" returns ["Bar"]. std::vector split_str(const std::string &s, const std::string &delim) { std::vector result; @@ -60,9 +53,6 @@ std::vector split_str(const std::string &s, return result; } -//------------------------------------------------------- -// Overriding less than operator for sorting -//------------------------------------------------------- bool prefix_compare(const PathTrie* x, const PathTrie* y) { if (x->_score == y->_score) { if (x->_character == y->_character) { @@ -73,11 +63,8 @@ bool prefix_compare(const PathTrie* x, const PathTrie* y) { } else { return x->_score > y->_score; } -} //---------- End path_compare --------------------------- +} -// -------------------------------------------------------------- -// Adds word to fst without copying entire dictionary -// -------------------------------------------------------------- void add_word_to_fst(const std::vector& word, fst::StdVectorFst* dictionary) { if (dictionary->NumStates() == 0) { @@ -93,15 +80,12 @@ void add_word_to_fst(const std::vector& word, src = dst; } dictionary->SetFinal(dst, fst::StdArc::Weight::One()); -} // ------------ End of add_word_to_fst ----------------------- +} -// --------------------------------------------------------- -// Adds a word to the dictionary FST based on char_map -// --------------------------------------------------------- bool add_word_to_dictionary(const std::string& word, const std::unordered_map& char_map, bool add_space, - int SPACE, + int SPACE_ID, fst::StdVectorFst* dictionary) { auto characters = split_utf8_str(word); @@ -109,7 +93,7 @@ bool add_word_to_dictionary(const std::string& word, for (auto& c : characters) { if (c == " ") { - int_word.push_back(SPACE); + int_word.push_back(SPACE_ID); } else { auto int_c = char_map.find(c); if (int_c != char_map.end()) { @@ -121,9 +105,9 @@ bool add_word_to_dictionary(const std::string& word, } if (add_space) { - int_word.push_back(SPACE); + int_word.push_back(SPACE_ID); } add_word_to_fst(int_word, dictionary); return true; -} // -------------- End of addWordToDictionary ------------ +} diff --git a/deploy/decoder_utils.h b/deploy/decoder_utils.h index 936605868..829ea76d0 100644 --- a/deploy/decoder_utils.h +++ b/deploy/decoder_utils.h @@ -7,6 +7,7 @@ const float NUM_FLT_INF = std::numeric_limits::max(); const float NUM_FLT_MIN = std::numeric_limits::min(); +// Function template for comparing two pairs template bool pair_comp_first_rev(const std::pair &a, const std::pair &b) @@ -31,7 +32,6 @@ T log_sum_exp(const T &x, const T &y) return std::log(std::exp(x-xmax) + std::exp(y-xmax)) + xmax; } - // Functor for prefix comparsion bool prefix_compare(const PathTrie* x, const PathTrie* y); @@ -39,17 +39,24 @@ bool prefix_compare(const PathTrie* x, const PathTrie* y); // See: http://stackoverflow.com/a/4063229 size_t get_utf8_str_len(const std::string& str); +// Split a string into a list of strings on a given string +// delimiter. NB: delimiters on beginning / end of string are +// trimmed. Eg, "FooBarFoo" split on "Foo" returns ["Bar"]. std::vector split_str(const std::string &s, const std::string &delim); +// Splits string into vector of strings representing +// UTF-8 characters (not same as chars) std::vector split_utf8_str(const std::string &str); +// Add a word in index to the dicionary of fst void add_word_to_fst(const std::vector& word, fst::StdVectorFst* dictionary); +// Add a word in string to dictionary bool add_word_to_dictionary(const std::string& word, const std::unordered_map& char_map, bool add_space, - int SPACE, + int SPACE_ID, fst::StdVectorFst* dictionary); #endif // DECODER_UTILS_H diff --git a/deploy/path_trie.cpp b/deploy/path_trie.cpp index b841831d7..b22f2a471 100644 --- a/deploy/path_trie.cpp +++ b/deploy/path_trie.cpp @@ -86,7 +86,7 @@ PathTrie* PathTrie::get_path_vec(std::vector& output) { PathTrie* PathTrie::get_path_vec(std::vector& output, int stop, - size_t max_steps /*= std::numeric_limits::max() */) { + size_t max_steps) { if (_character == stop || _character == _ROOT || output.size() == max_steps) { diff --git a/deploy/scorer.h b/deploy/scorer.h index 7d7ce430b..e3d61a71c 100644 --- a/deploy/scorer.h +++ b/deploy/scorer.h @@ -32,34 +32,48 @@ public: // Example: // Scorer scorer(alpha, beta, "path_of_language_model"); // scorer.get_log_cond_prob({ "WORD1", "WORD2", "WORD3" }); -// scorer.get_log_cond_prob("this a sentence"); // scorer.get_sent_log_prob({ "WORD1", "WORD2", "WORD3" }); class Scorer{ public: Scorer(double alpha, double beta, const std::string& lm_path); ~Scorer(); + double get_log_cond_prob(const std::vector& words); + double get_sent_log_prob(const std::vector& words); + size_t get_max_order() { return _max_order; } + + bool is_char_map_empty() {return _char_map.size() == 0; } + bool is_character_based() { return _is_character_based; } + // reset params alpha & beta void reset_params(float alpha, float beta); + // make ngram std::vector make_ngram(PathTrie* prefix); + // fill dictionary for fst void fill_dictionary(bool add_space); + // set char map void set_char_map(std::vector char_list); + std::vector split_labels(const std::vector &labels); + // expose to decoder double alpha; double beta; + // fst dictionary void* dictionary; protected: void load_LM(const char* filename); + double get_log_prob(const std::vector& words); + std::string vec2str(const std::vector &input); private: From 1d163ad15f7bd37799c7015024cbebb110680b95 Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Thu, 31 Aug 2017 12:22:27 +0800 Subject: [PATCH 159/335] Fixed a serious mistake of bidirectional simple rnn for DS2. --- cloud/pcloud_submit.sh | 4 ++-- layer.py | 16 ++++++++++------ 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/cloud/pcloud_submit.sh b/cloud/pcloud_submit.sh index a7fb42cbc..3c9a1c260 100644 --- a/cloud/pcloud_submit.sh +++ b/cloud/pcloud_submit.sh @@ -1,6 +1,6 @@ TRAIN_MANIFEST="cloud/cloud.manifest.train" DEV_MANIFEST="cloud/cloud.manifest.dev" -CLOUD_MODEL_DIR="/pfs/dlnel/home/USERNAME/deepspeech2/model" +CLOUD_MODEL_DIR="./checkpoints" BATCH_SIZE=256 NUM_GPU=8 NUM_NODE=1 @@ -11,7 +11,7 @@ DS2_PATH=${PWD%/*} cp -f pcloud_train.sh ${DS2_PATH} paddlecloud submit \ --image bootstrapper:5000/wanghaoshuang/pcloud_ds2:latest \ +-image bootstrapper:5000/paddlepaddle/pcloud_ds2:latest \ -jobname ${JOB_NAME} \ -cpu ${NUM_GPU} \ -gpu ${NUM_GPU} \ diff --git a/layer.py b/layer.py index 3b492645d..ef25c0a1b 100644 --- a/layer.py +++ b/layer.py @@ -55,16 +55,20 @@ def bidirectional_simple_rnn_bn_layer(name, input, size, act): :rtype: LayerOutput """ # input-hidden weights shared across bi-direcitonal rnn. - input_proj = paddle.layer.fc( + input_proj_forward = paddle.layer.fc( input=input, size=size, act=paddle.activation.Linear(), bias_attr=False) - # batch norm is only performed on input-state projection - input_proj_bn = paddle.layer.batch_norm( - input=input_proj, act=paddle.activation.Linear()) + input_proj_backward = paddle.layer.fc( + input=input, size=size, act=paddle.activation.Linear(), bias_attr=False) + # batch norm is only performed on input-state projection + input_proj_bn_forward = paddle.layer.batch_norm( + input=input_proj_forward, act=paddle.activation.Linear()) + input_proj_bn_backward = paddle.layer.batch_norm( + input=input_proj_backward, act=paddle.activation.Linear()) # forward and backward in time forward_simple_rnn = paddle.layer.recurrent( - input=input_proj_bn, act=act, reverse=False) + input=input_proj_bn_forward, act=act, reverse=False) backward_simple_rnn = paddle.layer.recurrent( - input=input_proj_bn, act=act, reverse=True) + input=input_proj_bn_backward, act=act, reverse=True) return paddle.layer.concat(input=[forward_simple_rnn, backward_simple_rnn]) From aed0cc991f45bffa56f5947b84ab14784bc11f87 Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Sun, 3 Sep 2017 17:24:04 +0800 Subject: [PATCH 160/335] Fixed a bug of mixing forward and backward projection in bi-directional GRUs. --- layer.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/layer.py b/layer.py index c4055aaa3..8fec0eea3 100644 --- a/layer.py +++ b/layer.py @@ -84,19 +84,26 @@ def bidirectional_gru_bn_layer(name, input, size, act): :rtype: LayerOutput """ # input-hidden weights shared across bi-direcitonal rnn. - input_proj = paddle.layer.fc( + input_proj_forward = paddle.layer.fc( + input=input, + size=size * 3, + act=paddle.activation.Linear(), + bias_attr=False) + input_proj_backward = paddle.layer.fc( input=input, size=size * 3, act=paddle.activation.Linear(), bias_attr=False) # batch norm is only performed on input-state projection - input_proj_bn = paddle.layer.batch_norm( - input=input_proj, act=paddle.activation.Linear()) + input_proj_bn_forward = paddle.layer.batch_norm( + input=input_proj_forward, act=paddle.activation.Linear()) + input_proj_bn_backward = paddle.layer.batch_norm( + input=input_proj_backward, act=paddle.activation.Linear()) # forward and backward in time forward_gru = paddle.layer.grumemory( - input=input_proj_bn, act=act, reverse=False) + input=input_proj_bn_forward, act=act, reverse=False) backward_gru = paddle.layer.grumemory( - input=input_proj_bn, act=act, reverse=True) + input=input_proj_bn_backward, act=act, reverse=True) return paddle.layer.concat(input=[forward_gru, backward_gru]) From 8f89a9bdd49a77b49c756700440a19bcd57a6667 Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Mon, 4 Sep 2017 13:06:54 +0800 Subject: [PATCH 161/335] Print log to pfs for DS cloud training and set use_gru to False by default. --- cloud/pcloud_train.sh | 4 ++-- train.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cloud/pcloud_train.sh b/cloud/pcloud_train.sh index e42da1d62..75949574d 100644 --- a/cloud/pcloud_train.sh +++ b/cloud/pcloud_train.sh @@ -13,7 +13,7 @@ python ./cloud/split_data.py \ --in_manifest_path=${DEV_MANIFEST} \ --out_manifest_path='/local.manifest.dev' -python train.py \ +python -u train.py \ --batch_size=$BATCH_SIZE \ --use_gpu=1 \ --trainer_count=${NUM_GPU} \ @@ -21,4 +21,4 @@ python train.py \ --is_local=${IS_LOCAL} \ --train_manifest_path='/local.manifest.train' \ --dev_manifest_path='/local.manifest.dev' \ ---output_model_dir=${MODEL_PATH} \ +--output_model_dir=${MODEL_PATH} 2>&1 | tee ./log/train.log diff --git a/train.py b/train.py index 8e95d7bc8..1d0b92fff 100644 --- a/train.py +++ b/train.py @@ -37,12 +37,12 @@ parser.add_argument( help="RNN layer number. (default: %(default)s)") parser.add_argument( "--rnn_layer_size", - default=1280, + default=1024, type=int, help="RNN layer cell number. (default: %(default)s)") parser.add_argument( "--use_gru", - default=True, + default=False, type=bool, help="Use GRU or simple RNN. (default: %(default)s)") parser.add_argument( From 177af059532946964ada888e526dcc33d74c275c Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Mon, 4 Sep 2017 15:01:40 +0800 Subject: [PATCH 162/335] Fix a bug in use_gru argument parsing. --- demo_server.py | 4 ++-- evaluate.py | 4 ++-- infer.py | 4 ++-- train.py | 2 +- tune.py | 4 ++-- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/demo_server.py b/demo_server.py index 60d972393..e4093ab29 100644 --- a/demo_server.py +++ b/demo_server.py @@ -68,8 +68,8 @@ parser.add_argument( help="RNN layer cell number. (default: %(default)s)") parser.add_argument( "--use_gru", - default=True, - type=bool, + default=False, + type=distutils.util.strtobool, help="Use GRU or simple RNN. (default: %(default)s)") parser.add_argument( "--use_gpu", diff --git a/evaluate.py b/evaluate.py index 2f87abbde..8ab5b9449 100644 --- a/evaluate.py +++ b/evaluate.py @@ -40,8 +40,8 @@ parser.add_argument( help="RNN layer cell number. (default: %(default)s)") parser.add_argument( "--use_gru", - default=True, - type=bool, + default=False, + type=distutils.util.strtobool, help="Use GRU or simple RNN. (default: %(default)s)") parser.add_argument( "--use_gpu", diff --git a/infer.py b/infer.py index 91b08932c..6b77f3d72 100644 --- a/infer.py +++ b/infer.py @@ -35,8 +35,8 @@ parser.add_argument( help="RNN layer cell number. (default: %(default)s)") parser.add_argument( "--use_gru", - default=True, - type=bool, + default=False, + type=distutils.util.strtobool, help="Use GRU or simple RNN. (default: %(default)s)") parser.add_argument( "--use_gpu", diff --git a/train.py b/train.py index 1d0b92fff..42870bf53 100644 --- a/train.py +++ b/train.py @@ -43,7 +43,7 @@ parser.add_argument( parser.add_argument( "--use_gru", default=False, - type=bool, + type=distutils.util.strtobool, help="Use GRU or simple RNN. (default: %(default)s)") parser.add_argument( "--adam_learning_rate", diff --git a/tune.py b/tune.py index 8a9b5b610..ffab8860b 100644 --- a/tune.py +++ b/tune.py @@ -36,8 +36,8 @@ parser.add_argument( help="RNN layer cell number. (default: %(default)s)") parser.add_argument( "--use_gru", - default=True, - type=bool, + default=False, + type=distutils.util.strtobool, help="Use GRU or simple RNN. (default: %(default)s)") parser.add_argument( "--use_gpu", From 2aa4af1c29ac22208fb33371a53677fabbd6d9d0 Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Mon, 4 Sep 2017 17:56:25 +0800 Subject: [PATCH 163/335] Revert back to support input-hidden weights sharing between bi-directional RNNs. 1. Add options to enable and disable RNN weights sharing. 2. Set rnn_layer_size to 2048 by default. 3. Revert back the striding steps of 1st conv layer from 2 to 3. 4. Revert back to BRelu. Above follows DS2 papers. --- demo_server.py | 12 +++++-- evaluate.py | 12 +++++-- infer.py | 12 +++++-- layer.py | 89 +++++++++++++++++++++++++++++++++++--------------- model.py | 14 +++++--- train.py | 12 +++++-- tune.py | 12 +++++-- utils.py | 6 ++-- 8 files changed, 126 insertions(+), 43 deletions(-) diff --git a/demo_server.py b/demo_server.py index e4093ab29..b000e35e9 100644 --- a/demo_server.py +++ b/demo_server.py @@ -63,9 +63,16 @@ parser.add_argument( help="RNN layer number. (default: %(default)s)") parser.add_argument( "--rnn_layer_size", - default=512, + default=2048, type=int, help="RNN layer cell number. (default: %(default)s)") +parser.add_argument( + "--share_rnn_weights", + default=True, + type=distutils.util.strtobool, + help="Whether to share input-hidden weights between forword and backward " + "directional simple RNNs. Only available when use_gru=False. " + "(default: %(default)s)") parser.add_argument( "--use_gru", default=False, @@ -205,7 +212,8 @@ def start_server(): num_rnn_layers=args.num_rnn_layers, rnn_layer_size=args.rnn_layer_size, use_gru=args.use_gru, - pretrained_model_path=args.model_filepath) + pretrained_model_path=args.model_filepath, + share_rnn_weights=args.share_rnn_weights) # prepare ASR inference handler def file_to_transcript(filename): diff --git a/evaluate.py b/evaluate.py index 8ab5b9449..8dd169b6c 100644 --- a/evaluate.py +++ b/evaluate.py @@ -35,9 +35,16 @@ parser.add_argument( help="RNN layer number. (default: %(default)s)") parser.add_argument( "--rnn_layer_size", - default=512, + default=2048, type=int, help="RNN layer cell number. (default: %(default)s)") +parser.add_argument( + "--share_rnn_weights", + default=True, + type=distutils.util.strtobool, + help="Whether to share input-hidden weights between forword and backward " + "directional simple RNNs. Only available when use_gru=False. " + "(default: %(default)s)") parser.add_argument( "--use_gru", default=False, @@ -148,7 +155,8 @@ def evaluate(): num_rnn_layers=args.num_rnn_layers, rnn_layer_size=args.rnn_layer_size, use_gru=args.use_gru, - pretrained_model_path=args.model_filepath) + pretrained_model_path=args.model_filepath, + share_rnn_weights=args.share_rnn_weights) error_rate_func = cer if args.error_rate_type == 'cer' else wer error_sum, num_ins = 0.0, 0 diff --git a/infer.py b/infer.py index 6b77f3d72..0c52ffc83 100644 --- a/infer.py +++ b/infer.py @@ -30,9 +30,16 @@ parser.add_argument( help="RNN layer number. (default: %(default)s)") parser.add_argument( "--rnn_layer_size", - default=512, + default=2048, type=int, help="RNN layer cell number. (default: %(default)s)") +parser.add_argument( + "--share_rnn_weights", + default=True, + type=distutils.util.strtobool, + help="Whether to share input-hidden weights between forword and backward " + "directional simple RNNs. Only available when use_gru=False. " + "(default: %(default)s)") parser.add_argument( "--use_gru", default=False, @@ -149,7 +156,8 @@ def infer(): num_rnn_layers=args.num_rnn_layers, rnn_layer_size=args.rnn_layer_size, use_gru=args.use_gru, - pretrained_model_path=args.model_filepath) + pretrained_model_path=args.model_filepath, + share_rnn_weights=args.share_rnn_weights) result_transcripts = ds2_model.infer_batch( infer_data=infer_data, decode_method=args.decode_method, diff --git a/layer.py b/layer.py index a91f694b8..b7ac3c23e 100644 --- a/layer.py +++ b/layer.py @@ -39,7 +39,7 @@ def conv_bn_layer(input, filter_size, num_channels_in, num_channels_out, stride, return paddle.layer.batch_norm(input=conv_layer, act=act) -def bidirectional_simple_rnn_bn_layer(name, input, size, act): +def bidirectional_simple_rnn_bn_layer(name, input, size, act, share_weights): """Bidirectonal simple rnn layer with sequence-wise batch normalization. The batch normalization is only performed on input-state weights. @@ -51,24 +51,50 @@ def bidirectional_simple_rnn_bn_layer(name, input, size, act): :type size: int :param act: Activation type. :type act: BaseActivation + :param share_weights: Whether to share input-hidden weights between + forward and backward directional RNNs. + :type share_weights: bool :return: Bidirectional simple rnn layer. :rtype: LayerOutput """ - # input-hidden weights shared across bi-direcitonal rnn. - input_proj_forward = paddle.layer.fc( - input=input, size=size, act=paddle.activation.Linear(), bias_attr=False) - input_proj_backward = paddle.layer.fc( - input=input, size=size, act=paddle.activation.Linear(), bias_attr=False) - # batch norm is only performed on input-state projection - input_proj_bn_forward = paddle.layer.batch_norm( - input=input_proj_forward, act=paddle.activation.Linear()) - input_proj_bn_backward = paddle.layer.batch_norm( - input=input_proj_backward, act=paddle.activation.Linear()) - # forward and backward in time - forward_simple_rnn = paddle.layer.recurrent( - input=input_proj_bn_forward, act=act, reverse=False) - backward_simple_rnn = paddle.layer.recurrent( - input=input_proj_bn_backward, act=act, reverse=True) + if share_weights: + # input-hidden weights shared between bi-direcitonal rnn. + input_proj = paddle.layer.fc( + input=input, + size=size, + act=paddle.activation.Linear(), + bias_attr=False) + # batch norm is only performed on input-state projection + input_proj_bn = paddle.layer.batch_norm( + input=input_proj, act=paddle.activation.Linear()) + # forward and backward in time + forward_simple_rnn = paddle.layer.recurrent( + input=input_proj_bn, act=act, reverse=False) + backward_simple_rnn = paddle.layer.recurrent( + input=input_proj_bn, act=act, reverse=True) + + else: + input_proj_forward = paddle.layer.fc( + input=input, + size=size, + act=paddle.activation.Linear(), + bias_attr=False) + input_proj_backward = paddle.layer.fc( + input=input, + size=size, + act=paddle.activation.Linear(), + bias_attr=False) + # batch norm is only performed on input-state projection + input_proj_bn_forward = paddle.layer.batch_norm( + input=input_proj_forward, act=paddle.activation.Linear()) + input_proj_bn_backward = paddle.layer.batch_norm( + input=input_proj_backward, act=paddle.activation.Linear()) + # forward and backward in time + forward_simple_rnn = paddle.layer.recurrent( + input=input_proj_bn_forward, act=act, reverse=False) + backward_simple_rnn = paddle.layer.recurrent( + input=input_proj_bn_backward, act=act, reverse=True) + return paddle.layer.concat(input=[forward_simple_rnn, backward_simple_rnn]) @@ -87,7 +113,6 @@ def bidirectional_gru_bn_layer(name, input, size, act): :return: Bidirectional simple rnn layer. :rtype: LayerOutput """ - # input-hidden weights shared across bi-direcitonal rnn. input_proj_forward = paddle.layer.fc( input=input, size=size * 3, @@ -98,7 +123,7 @@ def bidirectional_gru_bn_layer(name, input, size, act): size=size * 3, act=paddle.activation.Linear(), bias_attr=False) - # batch norm is only performed on input-state projection + # batch norm is only performed on input-related projections input_proj_bn_forward = paddle.layer.batch_norm( input=input_proj_forward, act=paddle.activation.Linear()) input_proj_bn_backward = paddle.layer.batch_norm( @@ -126,9 +151,9 @@ def conv_group(input, num_stacks): filter_size=(11, 41), num_channels_in=1, num_channels_out=32, - stride=(2, 2), + stride=(3, 2), padding=(5, 20), - act=paddle.activation.Relu()) + act=paddle.activation.BRelu()) for i in xrange(num_stacks - 1): conv = conv_bn_layer( input=conv, @@ -137,13 +162,13 @@ def conv_group(input, num_stacks): num_channels_out=32, stride=(1, 2), padding=(5, 10), - act=paddle.activation.Relu()) + act=paddle.activation.BRelu()) output_num_channels = 32 output_height = 160 // pow(2, num_stacks) + 1 return conv, output_num_channels, output_height -def rnn_group(input, size, num_stacks, use_gru): +def rnn_group(input, size, num_stacks, use_gru, share_rnn_weights): """RNN group with stacked bidirectional simple RNN layers. :param input: Input layer. @@ -154,6 +179,10 @@ def rnn_group(input, size, num_stacks, use_gru): :type num_stacks: int :param use_gru: Use gru if set True. Use simple rnn if set False. :type use_gru: bool + :param share_rnn_weights: Whether to share input-hidden weights between + forward and backward directional RNNs. + It is only available when use_gru=False. + :type share_weights: bool :return: Output layer of the RNN group. :rtype: LayerOutput """ @@ -165,12 +194,14 @@ def rnn_group(input, size, num_stacks, use_gru): input=output, size=size, act=paddle.activation.Relu()) + # BRelu does not support hppl, need to add later. Use Relu instead. else: output = bidirectional_simple_rnn_bn_layer( name=str(i), input=output, size=size, - act=paddle.activation.Relu()) + act=paddle.activation.BRelu(), + share_weights=share_rnn_weights) return output @@ -180,9 +211,10 @@ def deep_speech2(audio_data, num_conv_layers=2, num_rnn_layers=3, rnn_size=256, - use_gru=True): + use_gru=False, + share_rnn_weights=True): """ - The whole DeepSpeech2 model structure (a simplified version). + The whole DeepSpeech2 model structure. :param audio_data: Audio spectrogram data layer. :type audio_data: LayerOutput @@ -198,6 +230,10 @@ def deep_speech2(audio_data, :type rnn_size: int :param use_gru: Use gru if set True. Use simple rnn if set False. :type use_gru: bool + :param share_rnn_weights: Whether to share input-hidden weights between + forward and backward direction RNNs. + It is only available when use_gru=False. + :type share_weights: bool :return: A tuple of an output unnormalized log probability layer ( before softmax) and a ctc cost layer. :rtype: tuple of LayerOutput @@ -218,7 +254,8 @@ def deep_speech2(audio_data, input=conv2seq, size=rnn_size, num_stacks=num_rnn_layers, - use_gru=use_gru) + use_gru=use_gru, + share_rnn_weights=share_rnn_weights) fc = paddle.layer.fc( input=rnn_group_output, size=dict_size + 1, diff --git a/model.py b/model.py index eec971c00..0234ed2d4 100644 --- a/model.py +++ b/model.py @@ -27,12 +27,17 @@ class DeepSpeech2Model(object): :param pretrained_model_path: Pretrained model path. If None, will train from stratch. :type pretrained_model_path: basestring|None + :param share_rnn_weights: Whether to share input-hidden weights between + forward and backward directional RNNs.Notice that + for GRU, weight sharing is not supported. + :type share_rnn_weights: bool """ def __init__(self, vocab_size, num_conv_layers, num_rnn_layers, - rnn_layer_size, use_gru, pretrained_model_path): + rnn_layer_size, use_gru, pretrained_model_path, + share_rnn_weights): self._create_network(vocab_size, num_conv_layers, num_rnn_layers, - rnn_layer_size, use_gru) + rnn_layer_size, use_gru, share_rnn_weights) self._create_parameters(pretrained_model_path) self._inferer = None self._loss_inferer = None @@ -226,7 +231,7 @@ class DeepSpeech2Model(object): gzip.open(model_path)) def _create_network(self, vocab_size, num_conv_layers, num_rnn_layers, - rnn_layer_size, use_gru): + rnn_layer_size, use_gru, share_rnn_weights): """Create data layers and model network.""" # paddle.data_type.dense_array is used for variable batch input. # The size 161 * 161 is only an placeholder value and the real shape @@ -244,4 +249,5 @@ class DeepSpeech2Model(object): num_conv_layers=num_conv_layers, num_rnn_layers=num_rnn_layers, rnn_size=rnn_layer_size, - use_gru=use_gru) + use_gru=use_gru, + share_rnn_weights=share_rnn_weights) diff --git a/train.py b/train.py index 42870bf53..d055341f1 100644 --- a/train.py +++ b/train.py @@ -37,9 +37,16 @@ parser.add_argument( help="RNN layer number. (default: %(default)s)") parser.add_argument( "--rnn_layer_size", - default=1024, + default=2048, type=int, help="RNN layer cell number. (default: %(default)s)") +parser.add_argument( + "--share_rnn_weights", + default=True, + type=distutils.util.strtobool, + help="Whether to share input-hidden weights between forword and backward " + "directional simple RNNs. Only available when use_gru=False. " + "(default: %(default)s)") parser.add_argument( "--use_gru", default=False, @@ -176,7 +183,8 @@ def train(): num_rnn_layers=args.num_rnn_layers, rnn_layer_size=args.rnn_layer_size, use_gru=args.use_gru, - pretrained_model_path=args.init_model_path) + pretrained_model_path=args.init_model_path, + share_rnn_weights=args.share_rnn_weights) ds2_model.train( train_batch_reader=train_batch_reader, dev_batch_reader=dev_batch_reader, diff --git a/tune.py b/tune.py index ffab8860b..d8001339e 100644 --- a/tune.py +++ b/tune.py @@ -31,9 +31,16 @@ parser.add_argument( help="RNN layer number. (default: %(default)s)") parser.add_argument( "--rnn_layer_size", - default=512, + default=2048, type=int, help="RNN layer cell number. (default: %(default)s)") +parser.add_argument( + "--share_rnn_weights", + default=True, + type=distutils.util.strtobool, + help="Whether to share input-hidden weights between forword and backward " + "directional simple RNNs. Only available when use_gru=False. " + "(default: %(default)s)") parser.add_argument( "--use_gru", default=False, @@ -164,7 +171,8 @@ def tune(): num_rnn_layers=args.num_rnn_layers, rnn_layer_size=args.rnn_layer_size, use_gru=args.use_gru, - pretrained_model_path=args.model_filepath) + pretrained_model_path=args.model_filepath, + share_rnn_weights=args.share_rnn_weights) # create grid for search cand_alphas = np.linspace(args.alpha_from, args.alpha_to, args.num_alphas) diff --git a/utils.py b/utils.py index 9ca363c8f..1d51e2042 100644 --- a/utils.py +++ b/utils.py @@ -10,12 +10,12 @@ def print_arguments(args): Usage: .. code-block:: python - + parser = argparse.ArgumentParser() parser.add_argument("name", default="Jonh", type=str, help="User name.") - args = parser.parse_args() + args = parser.parse_args() print_arguments(args) - + :param args: Input argparse.Namespace for printing. :type args: argparse.Namespace """ From 805846ce67bd82c183c9ab5e6fb3872c31b241d6 Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Mon, 4 Sep 2017 20:13:08 +0800 Subject: [PATCH 164/335] Reduce the config parsing codes for DS2 and make it looks cleaner. --- decoder.py | 15 ++-- demo_server.py | 187 ++++++++++++++++----------------------- evaluate.py | 206 ++++++++++++++++--------------------------- infer.py | 205 ++++++++++++++++-------------------------- model.py | 19 ++-- train.py | 235 ++++++++++++++++++------------------------------- tune.py | 214 +++++++++++++++++--------------------------- utils.py | 25 ------ 8 files changed, 415 insertions(+), 691 deletions(-) delete mode 100644 utils.py diff --git a/decoder.py b/decoder.py index 8f2e0508d..61ead25c8 100644 --- a/decoder.py +++ b/decoder.py @@ -9,8 +9,9 @@ from math import log import multiprocessing -def ctc_best_path_decoder(probs_seq, vocabulary): - """Best path decoder, also called argmax decoder or greedy decoder. +def ctc_greedy_decoder(probs_seq, vocabulary): + """CTC greedy (best path) decoder. + Path consisting of the most probable tokens are further post-processed to remove consecutive repetitions and all blanks. @@ -45,10 +46,12 @@ def ctc_beam_search_decoder(probs_seq, cutoff_prob=1.0, ext_scoring_func=None, nproc=False): - """Beam search decoder for CTC-trained network. It utilizes beam search - to approximately select top best decoding labels and returning results - in the descending order. The implementation is based on Prefix - Beam Search (https://arxiv.org/abs/1408.2873), and the unclear part is + """CTC Beam search decoder. + + It utilizes beam search to approximately select top best decoding + labels and returning results in the descending order. + The implementation is based on Prefix Beam Search + (https://arxiv.org/abs/1408.2873), and the unclear part is redesigned. Two important modifications: 1) in the iterative computation of probabilities, the assignment operation is changed to accumulation for one prefix may comes from different paths; 2) the if condition "if l^+ not diff --git a/demo_server.py b/demo_server.py index b000e35e9..d2afa49b4 100644 --- a/demo_server.py +++ b/demo_server.py @@ -9,118 +9,74 @@ import SocketServer import struct import wave import paddle.v2 as paddle -from utils import print_arguments from data_utils.data import DataGenerator from model import DeepSpeech2Model from data_utils.utils import read_manifest parser = argparse.ArgumentParser(description=__doc__) -parser.add_argument( - "--host_ip", - default="localhost", - type=str, - help="Server IP address. (default: %(default)s)") -parser.add_argument( - "--host_port", - default=8086, - type=int, - help="Server Port. (default: %(default)s)") -parser.add_argument( - "--speech_save_dir", - default="demo_cache", - type=str, - help="Directory for saving demo speech. (default: %(default)s)") -parser.add_argument( - "--vocab_filepath", - default='datasets/vocab/eng_vocab.txt', - type=str, - help="Vocabulary filepath. (default: %(default)s)") -parser.add_argument( - "--mean_std_filepath", - default='mean_std.npz', - type=str, - help="Manifest path for normalizer. (default: %(default)s)") -parser.add_argument( - "--warmup_manifest_path", - default='datasets/manifest.test', - type=str, - help="Manifest path for warmup test. (default: %(default)s)") -parser.add_argument( - "--specgram_type", - default='linear', - type=str, - help="Feature type of audio data: 'linear' (power spectrum)" - " or 'mfcc'. (default: %(default)s)") -parser.add_argument( - "--num_conv_layers", - default=2, - type=int, - help="Convolution layer number. (default: %(default)s)") -parser.add_argument( - "--num_rnn_layers", - default=3, - type=int, - help="RNN layer number. (default: %(default)s)") -parser.add_argument( - "--rnn_layer_size", - default=2048, - type=int, - help="RNN layer cell number. (default: %(default)s)") -parser.add_argument( - "--share_rnn_weights", - default=True, - type=distutils.util.strtobool, - help="Whether to share input-hidden weights between forword and backward " - "directional simple RNNs. Only available when use_gru=False. " - "(default: %(default)s)") -parser.add_argument( - "--use_gru", - default=False, - type=distutils.util.strtobool, - help="Use GRU or simple RNN. (default: %(default)s)") -parser.add_argument( - "--use_gpu", - default=True, - type=distutils.util.strtobool, - help="Use gpu or not. (default: %(default)s)") -parser.add_argument( - "--model_filepath", - default='checkpoints/params.latest.tar.gz', - type=str, - help="Model filepath. (default: %(default)s)") -parser.add_argument( - "--decode_method", - default='beam_search', - type=str, - help="Method for ctc decoding: best_path or beam_search. " - "(default: %(default)s)") -parser.add_argument( - "--beam_size", - default=100, - type=int, - help="Width for beam search decoding. (default: %(default)d)") -parser.add_argument( - "--language_model_path", - default="lm/data/common_crawl_00.prune01111.trie.klm", - type=str, - help="Path for language model. (default: %(default)s)") -parser.add_argument( - "--alpha", - default=0.36, - type=float, - help="Parameter associated with language model. (default: %(default)f)") -parser.add_argument( - "--beta", - default=0.25, - type=float, - help="Parameter associated with word count. (default: %(default)f)") -parser.add_argument( - "--cutoff_prob", - default=0.99, - type=float, - help="The cutoff probability of pruning" - "in beam search. (default: %(default)f)") + + +def add_arg(argname, type, default, help, **kwargs): + type = distutils.util.strtobool if type == bool else type + parser.add_argument( + "--" + argname, + default=default, + type=type, + help=help + ' Default: %(default)s.', + **kwargs) + + +# yapf: disable +# configurations of overall +add_arg('host_port', int, 8086, "Server's IP port.") +add_arg('host_ip', str, + 'localhost', + "Server's IP address.") +add_arg('speech_save_dir', str, + 'demo_cache', + "Directory to save demo audios.") +add_arg('use_gpu', bool, True, "Use GPU or not.") +# configurations of decoder +add_arg('beam_size', int, 500, "Beam search width.") +add_arg('alpha', float, 0.36, "Coef of LM for beam search.") +add_arg('beta', float, 0.25, "Coef of WC for beam search.") +add_arg('cutoff_prob', float, 0.99, "Cutoff probability for pruning.") +add_arg('lang_model_path', str, + 'lm/data/common_crawl_00.prune01111.trie.klm', + "Filepath for language model.") +add_arg('decoder_method', str, + 'ctc_beam_search', + "Decoder method. Options: ctc_beam_search, ctc_greedy", + choices = ['ctc_beam_search', 'ctc_greedy']) +# configurations of data preprocess +add_arg('specgram_type', str, + 'linear', + "Audio feature type. Options: linear, mfcc.", + choices=['linear', 'mfcc']) +# configurations of model structure +add_arg('num_conv_layers', int, 2, "# of convolution layers.") +add_arg('num_rnn_layers', int, 3, "# of recurrent layers.") +add_arg('rnn_layer_size', int, 2048, "# of recurrent cells per layer.") +add_arg('use_gru', bool, False, "Use GRUs instead of Simple RNNs.") +add_arg('share_rnn_weights',bool, True, "Share input-hidden weights across " + "bi-directional RNNs. Not for GRU.") +# configurations of data io +add_arg('warmup_manifest', str, + 'datasets/manifest.test', + "Filepath of manifest to warm up.") +add_arg('mean_std_path', str, + 'mean_std.npz', + "Filepath of normalizer's mean & std.") +add_arg('vocab_path', str, + 'datasets/vocab/eng_vocab.txt', + "Filepath of vocabulary.") +# configurations of model io +add_arg('model_path', str, + './checkpoints/params.latest.tar.gz', + "If None, the training starts from scratch, " + "otherwise, it resumes from the pre-trained model.") args = parser.parse_args() +# yapf: disable class AsrTCPServer(SocketServer.TCPServer): @@ -200,8 +156,8 @@ def start_server(): """Start the ASR server""" # prepare data generator data_generator = DataGenerator( - vocab_filepath=args.vocab_filepath, - mean_std_filepath=args.mean_std_filepath, + vocab_filepath=args.vocab_path, + mean_std_filepath=args.mean_std_path, augmentation_config='{}', specgram_type=args.specgram_type, num_threads=1) @@ -212,7 +168,7 @@ def start_server(): num_rnn_layers=args.num_rnn_layers, rnn_layer_size=args.rnn_layer_size, use_gru=args.use_gru, - pretrained_model_path=args.model_filepath, + pretrained_model_path=args.model_path, share_rnn_weights=args.share_rnn_weights) # prepare ASR inference handler @@ -220,13 +176,13 @@ def start_server(): feature = data_generator.process_utterance(filename, "") result_transcript = ds2_model.infer_batch( infer_data=[feature], - decode_method=args.decode_method, + decoder_method=args.decoder_method, beam_alpha=args.alpha, beam_beta=args.beta, beam_size=args.beam_size, cutoff_prob=args.cutoff_prob, vocab_list=data_generator.vocab_list, - language_model_path=args.language_model_path, + language_model_path=args.lang_model_path, num_processes=1) return result_transcript[0] @@ -235,7 +191,7 @@ def start_server(): print('Warming up ...') warm_up_test( audio_process_handler=file_to_transcript, - manifest_path=args.warmup_manifest_path, + manifest_path=args.warmup_manifest, num_test_cases=3) print('-----------------------------------------------------------') @@ -249,6 +205,13 @@ def start_server(): server.serve_forever() +def print_arguments(args): + print("----------- Configuration Arguments -----------") + for arg, value in sorted(vars(args).iteritems()): + print("%s: %s" % (arg, value)) + print("------------------------------------------------") + + def main(): print_arguments(args) paddle.init(use_gpu=args.use_gpu, trainer_count=1) diff --git a/evaluate.py b/evaluate.py index 8dd169b6c..1adf42557 100644 --- a/evaluate.py +++ b/evaluate.py @@ -10,140 +10,83 @@ import paddle.v2 as paddle from data_utils.data import DataGenerator from model import DeepSpeech2Model from error_rate import wer, cer -import utils +NUM_CPU = multiprocessing.cpu_count() // 2 parser = argparse.ArgumentParser(description=__doc__) -parser.add_argument( - "--batch_size", - default=128, - type=int, - help="Minibatch size for evaluation. (default: %(default)s)") -parser.add_argument( - "--trainer_count", - default=8, - type=int, - help="Trainer number. (default: %(default)s)") -parser.add_argument( - "--num_conv_layers", - default=2, - type=int, - help="Convolution layer number. (default: %(default)s)") -parser.add_argument( - "--num_rnn_layers", - default=3, - type=int, - help="RNN layer number. (default: %(default)s)") -parser.add_argument( - "--rnn_layer_size", - default=2048, - type=int, - help="RNN layer cell number. (default: %(default)s)") -parser.add_argument( - "--share_rnn_weights", - default=True, - type=distutils.util.strtobool, - help="Whether to share input-hidden weights between forword and backward " - "directional simple RNNs. Only available when use_gru=False. " - "(default: %(default)s)") -parser.add_argument( - "--use_gru", - default=False, - type=distutils.util.strtobool, - help="Use GRU or simple RNN. (default: %(default)s)") -parser.add_argument( - "--use_gpu", - default=True, - type=distutils.util.strtobool, - help="Use gpu or not. (default: %(default)s)") -parser.add_argument( - "--num_threads_data", - default=multiprocessing.cpu_count() // 2, - type=int, - help="Number of cpu threads for preprocessing data. (default: %(default)s)") -parser.add_argument( - "--num_processes_beam_search", - default=multiprocessing.cpu_count() // 2, - type=int, - help="Number of cpu processes for beam search. (default: %(default)s)") -parser.add_argument( - "--mean_std_filepath", - default='mean_std.npz', - type=str, - help="Manifest path for normalizer. (default: %(default)s)") -parser.add_argument( - "--decode_method", - default='beam_search', - type=str, - help="Method for ctc decoding, best_path or beam_search. " - "(default: %(default)s)") -parser.add_argument( - "--language_model_path", - default="lm/data/common_crawl_00.prune01111.trie.klm", - type=str, - help="Path for language model. (default: %(default)s)") -parser.add_argument( - "--alpha", - default=0.36, - type=float, - help="Parameter associated with language model. (default: %(default)f)") -parser.add_argument( - "--beta", - default=0.25, - type=float, - help="Parameter associated with word count. (default: %(default)f)") -parser.add_argument( - "--cutoff_prob", - default=0.99, - type=float, - help="The cutoff probability of pruning" - "in beam search. (default: %(default)f)") -parser.add_argument( - "--beam_size", - default=500, - type=int, - help="Width for beam search decoding. (default: %(default)d)") -parser.add_argument( - "--specgram_type", - default='linear', - type=str, - help="Feature type of audio data: 'linear' (power spectrum)" - " or 'mfcc'. (default: %(default)s)") -parser.add_argument( - "--decode_manifest_path", - default='datasets/manifest.test', - type=str, - help="Manifest path for decoding. (default: %(default)s)") -parser.add_argument( - "--model_filepath", - default='checkpoints/params.latest.tar.gz', - type=str, - help="Model filepath. (default: %(default)s)") -parser.add_argument( - "--vocab_filepath", - default='datasets/vocab/eng_vocab.txt', - type=str, - help="Vocabulary filepath. (default: %(default)s)") -parser.add_argument( - "--error_rate_type", - default='wer', - choices=['wer', 'cer'], - type=str, - help="Error rate type for evaluation. 'wer' for word error rate and 'cer' " - "for character error rate. " - "(default: %(default)s)") + + +def add_arg(argname, type, default, help, **kwargs): + type = distutils.util.strtobool if type == bool else type + parser.add_argument( + "--" + argname, + default=default, + type=type, + help=help + ' Default: %(default)s.', + **kwargs) + + +# yapf: disable +# configurations of overall +add_arg('batch_size', int, 128, "Minibatch size.") +add_arg('trainer_count', int, 8, "# of Trainers (CPUs or GPUs).") +add_arg('use_gpu', bool, True, "Use GPU or not.") +add_arg('error_rate_type', str, 'wer', "Error rate type for evaluation.", + choices=['wer', 'cer']) +# configurations of decoder +add_arg('beam_size', int, 500, "Beam search width.") +add_arg('alpha', float, 0.36, "Coef of LM for beam search.") +add_arg('beta', float, 0.25, "Coef of WC for beam search.") +add_arg('cutoff_prob', float, 0.99, "Cutoff probability for pruning.") +add_arg('parallels_bsearch',int, NUM_CPU,"# of CPUs for beam search.") +add_arg('lang_model_path', str, + 'lm/data/common_crawl_00.prune01111.trie.klm', + "Filepath for language model.") +add_arg('decoder_method', str, + 'ctc_beam_search', + "Decoder method. Options: ctc_beam_search, ctc_greedy", + choices = ['ctc_beam_search', 'ctc_greedy']) +# configurations of data preprocess +add_arg('parallels_data', int, NUM_CPU,"# of CPUs for data preprocessing.") +add_arg('specgram_type', str, + 'linear', + "Audio feature type. Options: linear, mfcc.", + choices=['linear', 'mfcc']) +# configurations of model structure +add_arg('num_conv_layers', int, 2, "# of convolution layers.") +add_arg('num_rnn_layers', int, 3, "# of recurrent layers.") +add_arg('rnn_layer_size', int, 2048, "# of recurrent cells per layer.") +add_arg('use_gru', bool, False, "Use GRUs instead of Simple RNNs.") +add_arg('share_rnn_weights',bool, True, "Share input-hidden weights across " + "bi-directional RNNs. Not for GRU.") +# configurations of data io +add_arg('test_manifest', str, + 'datasets/manifest.test', + "Filepath of manifest to evaluate.") +add_arg('mean_std_path', str, + 'mean_std.npz', + "Filepath of normalizer's mean & std.") +add_arg('vocab_path', str, + 'datasets/vocab/eng_vocab.txt', + "Filepath of vocabulary.") +# configurations of model io +add_arg('model_path', str, + './checkpoints/params.latest.tar.gz', + "If None, the training starts from scratch, " + "otherwise, it resumes from the pre-trained model.") args = parser.parse_args() +# yapf: disable def evaluate(): """Evaluate on whole test data for DeepSpeech2.""" data_generator = DataGenerator( - vocab_filepath=args.vocab_filepath, - mean_std_filepath=args.mean_std_filepath, + vocab_filepath=args.vocab_path, + mean_std_filepath=args.mean_std_path, augmentation_config='{}', specgram_type=args.specgram_type, - num_threads=args.num_threads_data) + num_threads=args.parallels_data) batch_reader = data_generator.batch_reader_creator( - manifest_path=args.decode_manifest_path, + manifest_path=args.test_manifest, batch_size=args.batch_size, min_batch_size=1, sortagrad=False, @@ -155,7 +98,7 @@ def evaluate(): num_rnn_layers=args.num_rnn_layers, rnn_layer_size=args.rnn_layer_size, use_gru=args.use_gru, - pretrained_model_path=args.model_filepath, + pretrained_model_path=args.model_path, share_rnn_weights=args.share_rnn_weights) error_rate_func = cer if args.error_rate_type == 'cer' else wer @@ -163,14 +106,14 @@ def evaluate(): for infer_data in batch_reader(): result_transcripts = ds2_model.infer_batch( infer_data=infer_data, - decode_method=args.decode_method, + decoder_method=args.decoder_method, beam_alpha=args.alpha, beam_beta=args.beta, beam_size=args.beam_size, cutoff_prob=args.cutoff_prob, vocab_list=data_generator.vocab_list, - language_model_path=args.language_model_path, - num_processes=args.num_processes_beam_search) + language_model_path=args.lang_model_path, + num_processes=args.parallels_bsearch) target_transcripts = [ ''.join([data_generator.vocab_list[token] for token in transcript]) for _, transcript in infer_data @@ -184,8 +127,15 @@ def evaluate(): (args.error_rate_type, num_ins, num_ins, error_sum / num_ins)) +def print_arguments(args): + print("----------- Configuration Arguments -----------") + for arg, value in sorted(vars(args).iteritems()): + print("%s: %s" % (arg, value)) + print("------------------------------------------------") + + def main(): - utils.print_arguments(args) + print_arguments(args) paddle.init(use_gpu=args.use_gpu, trainer_count=args.trainer_count) evaluate() diff --git a/infer.py b/infer.py index 0c52ffc83..cf02808c1 100644 --- a/infer.py +++ b/infer.py @@ -10,140 +10,82 @@ import paddle.v2 as paddle from data_utils.data import DataGenerator from model import DeepSpeech2Model from error_rate import wer, cer -import utils +NUM_CPU = multiprocessing.cpu_count() // 2 parser = argparse.ArgumentParser(description=__doc__) -parser.add_argument( - "--num_samples", - default=10, - type=int, - help="Number of samples for inference. (default: %(default)s)") -parser.add_argument( - "--num_conv_layers", - default=2, - type=int, - help="Convolution layer number. (default: %(default)s)") -parser.add_argument( - "--num_rnn_layers", - default=3, - type=int, - help="RNN layer number. (default: %(default)s)") -parser.add_argument( - "--rnn_layer_size", - default=2048, - type=int, - help="RNN layer cell number. (default: %(default)s)") -parser.add_argument( - "--share_rnn_weights", - default=True, - type=distutils.util.strtobool, - help="Whether to share input-hidden weights between forword and backward " - "directional simple RNNs. Only available when use_gru=False. " - "(default: %(default)s)") -parser.add_argument( - "--use_gru", - default=False, - type=distutils.util.strtobool, - help="Use GRU or simple RNN. (default: %(default)s)") -parser.add_argument( - "--use_gpu", - default=True, - type=distutils.util.strtobool, - help="Use gpu or not. (default: %(default)s)") -parser.add_argument( - "--num_threads_data", - default=1, - type=int, - help="Number of cpu threads for preprocessing data. (default: %(default)s)") -parser.add_argument( - "--num_processes_beam_search", - default=multiprocessing.cpu_count() // 2, - type=int, - help="Number of cpu processes for beam search. (default: %(default)s)") -parser.add_argument( - "--specgram_type", - default='linear', - type=str, - help="Feature type of audio data: 'linear' (power spectrum)" - " or 'mfcc'. (default: %(default)s)") -parser.add_argument( - "--trainer_count", - default=8, - type=int, - help="Trainer number. (default: %(default)s)") -parser.add_argument( - "--mean_std_filepath", - default='mean_std.npz', - type=str, - help="Manifest path for normalizer. (default: %(default)s)") -parser.add_argument( - "--decode_manifest_path", - default='datasets/manifest.test', - type=str, - help="Manifest path for decoding. (default: %(default)s)") -parser.add_argument( - "--model_filepath", - default='checkpoints/params.latest.tar.gz', - type=str, - help="Model filepath. (default: %(default)s)") -parser.add_argument( - "--vocab_filepath", - default='datasets/vocab/eng_vocab.txt', - type=str, - help="Vocabulary filepath. (default: %(default)s)") -parser.add_argument( - "--decode_method", - default='beam_search', - type=str, - help="Method for ctc decoding: best_path or beam_search. " - "(default: %(default)s)") -parser.add_argument( - "--beam_size", - default=500, - type=int, - help="Width for beam search decoding. (default: %(default)d)") -parser.add_argument( - "--language_model_path", - default="lm/data/common_crawl_00.prune01111.trie.klm", - type=str, - help="Path for language model. (default: %(default)s)") -parser.add_argument( - "--alpha", - default=0.36, - type=float, - help="Parameter associated with language model. (default: %(default)f)") -parser.add_argument( - "--beta", - default=0.25, - type=float, - help="Parameter associated with word count. (default: %(default)f)") -parser.add_argument( - "--cutoff_prob", - default=0.99, - type=float, - help="The cutoff probability of pruning" - "in beam search. (default: %(default)f)") -parser.add_argument( - "--error_rate_type", - default='wer', - choices=['wer', 'cer'], - type=str, - help="Error rate type for evaluation. 'wer' for word error rate and 'cer' " - "for character error rate. " - "(default: %(default)s)") + + +def add_arg(argname, type, default, help, **kwargs): + type = distutils.util.strtobool if type == bool else type + parser.add_argument( + "--" + argname, + default=default, + type=type, + help=help + ' Default: %(default)s.', + **kwargs) + + +# yapf: disable +# configurations of overall +add_arg('num_samples', int, 10, "# of samples to infer.") +add_arg('trainer_count', int, 8, "# of Trainers (CPUs or GPUs).") +add_arg('use_gpu', bool, True, "Use GPU or not.") +add_arg('error_rate_type', str, 'wer', "Error rate type for evaluation.", + choices=['wer', 'cer']) +# configurations of decoder +add_arg('beam_size', int, 500, "Beam search width.") +add_arg('alpha', float, 0.36, "Coef of LM for beam search.") +add_arg('beta', float, 0.25, "Coef of WC for beam search.") +add_arg('cutoff_prob', float, 0.99, "Cutoff probability for pruning.") +add_arg('parallels_bsearch',int, NUM_CPU,"# of CPUs for beam search.") +add_arg('lang_model_path', str, + 'lm/data/common_crawl_00.prune01111.trie.klm', + "Filepath for language model.") +add_arg('decoder_method', str, + 'ctc_beam_search', + "Decoder method. Options: ctc_beam_search, ctc_greedy", + choices = ['ctc_beam_search', 'ctc_greedy']) +# configurations of data preprocess +add_arg('specgram_type', str, + 'linear', + "Audio feature type. Options: linear, mfcc.", + choices=['linear', 'mfcc']) +# configurations of model structure +add_arg('num_conv_layers', int, 2, "# of convolution layers.") +add_arg('num_rnn_layers', int, 3, "# of recurrent layers.") +add_arg('rnn_layer_size', int, 2048, "# of recurrent cells per layer.") +add_arg('use_gru', bool, False, "Use GRUs instead of Simple RNNs.") +add_arg('share_rnn_weights',bool, True, "Share input-hidden weights across " + "bi-directional RNNs. Not for GRU.") +# configurations of data io +add_arg('infer_manifest', str, + 'datasets/manifest.dev', + "Filepath of manifest to infer.") +add_arg('mean_std_path', str, + 'mean_std.npz', + "Filepath of normalizer's mean & std.") +add_arg('vocab_path', str, + 'datasets/vocab/eng_vocab.txt', + "Filepath of vocabulary.") +# configurations of model io +add_arg('model_path', str, + './checkpoints/params.latest.tar.gz', + "If None, the training starts from scratch, " + "otherwise, it resumes from the pre-trained model.") args = parser.parse_args() +# yapf: disable def infer(): """Inference for DeepSpeech2.""" data_generator = DataGenerator( - vocab_filepath=args.vocab_filepath, - mean_std_filepath=args.mean_std_filepath, + vocab_filepath=args.vocab_path, + mean_std_filepath=args.mean_std_path, augmentation_config='{}', specgram_type=args.specgram_type, - num_threads=args.num_threads_data) + num_threads=1) batch_reader = data_generator.batch_reader_creator( - manifest_path=args.decode_manifest_path, + manifest_path=args.infer_manifest, batch_size=args.num_samples, min_batch_size=1, sortagrad=False, @@ -156,18 +98,18 @@ def infer(): num_rnn_layers=args.num_rnn_layers, rnn_layer_size=args.rnn_layer_size, use_gru=args.use_gru, - pretrained_model_path=args.model_filepath, + pretrained_model_path=args.model_path, share_rnn_weights=args.share_rnn_weights) result_transcripts = ds2_model.infer_batch( infer_data=infer_data, - decode_method=args.decode_method, + decoder_method=args.decoder_method, beam_alpha=args.alpha, beam_beta=args.beta, beam_size=args.beam_size, cutoff_prob=args.cutoff_prob, vocab_list=data_generator.vocab_list, - language_model_path=args.language_model_path, - num_processes=args.num_processes_beam_search) + language_model_path=args.lang_model_path, + num_processes=args.parallels_bsearch) error_rate_func = cer if args.error_rate_type == 'cer' else wer target_transcripts = [ @@ -181,8 +123,15 @@ def infer(): (args.error_rate_type, error_rate_func(target, result))) +def print_arguments(args): + print("----------- Configuration Arguments -----------") + for arg, value in sorted(vars(args).iteritems()): + print("%s: %s" % (arg, value)) + print("------------------------------------------------") + + def main(): - utils.print_arguments(args) + print_arguments(args) paddle.init(use_gpu=args.use_gpu, trainer_count=args.trainer_count) infer() diff --git a/model.py b/model.py index 0234ed2d4..894605bfd 100644 --- a/model.py +++ b/model.py @@ -146,7 +146,7 @@ class DeepSpeech2Model(object): # run inference return self._loss_inferer.infer(input=infer_data) - def infer_batch(self, infer_data, decode_method, beam_alpha, beam_beta, + def infer_batch(self, infer_data, decoder_method, beam_alpha, beam_beta, beam_size, cutoff_prob, vocab_list, language_model_path, num_processes): """Model inference. Infer the transcription for a batch of speech @@ -156,9 +156,9 @@ class DeepSpeech2Model(object): consisting of a tuple of audio features and transcription text (empty string). :type infer_data: list - :param decode_method: Decoding method name, 'best_path' or - 'beam search'. - :param decode_method: string + :param decoder_method: Decoding method name, 'ctc_greedy' or + 'ctc_beam_search'. + :param decoder_method: string :param beam_alpha: Parameter associated with language model. :type beam_alpha: float :param beam_beta: Parameter associated with word count. @@ -190,13 +190,13 @@ class DeepSpeech2Model(object): ] # run decoder results = [] - if decode_method == "best_path": + if decoder_method == "ctc_greedy": # best path decode for i, probs in enumerate(probs_split): - output_transcription = ctc_best_path_decoder( + output_transcription = ctc_greedy_decoder( probs_seq=probs, vocabulary=vocab_list) results.append(output_transcription) - elif decode_method == "beam_search": + elif decoder_method == "ctc_beam_search": # initialize external scorer if self._ext_scorer == None: self._ext_scorer = LmScorer(beam_alpha, beam_beta, @@ -205,7 +205,6 @@ class DeepSpeech2Model(object): else: self._ext_scorer.reset_params(beam_alpha, beam_beta) assert self._loaded_lm_path == language_model_path - # beam search decode beam_search_results = ctc_beam_search_decoder_batch( probs_split=probs_split, @@ -218,8 +217,8 @@ class DeepSpeech2Model(object): results = [result[0][1] for result in beam_search_results] else: - raise ValueError("Decoding method [%s] is not supported." % - decode_method) + raise ValueError("Decoder method [%s] is not supported." % + decoder_method) return results def _create_parameters(self, model_path=None): diff --git a/train.py b/train.py index d055341f1..d21e6a3bd 100644 --- a/train.py +++ b/train.py @@ -9,169 +9,103 @@ import multiprocessing import paddle.v2 as paddle from model import DeepSpeech2Model from data_utils.data import DataGenerator -import utils +NUM_CPU = multiprocessing.cpu_count() // 2 parser = argparse.ArgumentParser(description=__doc__) -parser.add_argument( - "--batch_size", default=256, type=int, help="Minibatch size.") -parser.add_argument( - "--num_passes", - default=200, - type=int, - help="Training pass number. (default: %(default)s)") -parser.add_argument( - "--num_iterations_print", - default=100, - type=int, - help="Number of iterations for every train cost printing. " - "(default: %(default)s)") -parser.add_argument( - "--num_conv_layers", - default=2, - type=int, - help="Convolution layer number. (default: %(default)s)") -parser.add_argument( - "--num_rnn_layers", - default=3, - type=int, - help="RNN layer number. (default: %(default)s)") -parser.add_argument( - "--rnn_layer_size", - default=2048, - type=int, - help="RNN layer cell number. (default: %(default)s)") -parser.add_argument( - "--share_rnn_weights", - default=True, - type=distutils.util.strtobool, - help="Whether to share input-hidden weights between forword and backward " - "directional simple RNNs. Only available when use_gru=False. " - "(default: %(default)s)") -parser.add_argument( - "--use_gru", - default=False, - type=distutils.util.strtobool, - help="Use GRU or simple RNN. (default: %(default)s)") -parser.add_argument( - "--adam_learning_rate", - default=5e-4, - type=float, - help="Learning rate for ADAM Optimizer. (default: %(default)s)") -parser.add_argument( - "--use_gpu", - default=True, - type=distutils.util.strtobool, - help="Use gpu or not. (default: %(default)s)") -parser.add_argument( - "--use_sortagrad", - default=True, - type=distutils.util.strtobool, - help="Use sortagrad or not. (default: %(default)s)") -parser.add_argument( - "--specgram_type", - default='linear', - type=str, - help="Feature type of audio data: 'linear' (power spectrum)" - " or 'mfcc'. (default: %(default)s)") -parser.add_argument( - "--max_duration", - default=27.0, - type=float, - help="Audios with duration larger than this will be discarded. " - "(default: %(default)s)") -parser.add_argument( - "--min_duration", - default=0.0, - type=float, - help="Audios with duration smaller than this will be discarded. " - "(default: %(default)s)") -parser.add_argument( - "--shuffle_method", - default='batch_shuffle_clipped', - type=str, - help="Shuffle method: 'instance_shuffle', 'batch_shuffle', " - "'batch_shuffle_batch'. (default: %(default)s)") -parser.add_argument( - "--trainer_count", - default=8, - type=int, - help="Trainer number. (default: %(default)s)") -parser.add_argument( - "--num_threads_data", - default=multiprocessing.cpu_count() // 2, - type=int, - help="Number of cpu threads for preprocessing data. (default: %(default)s)") -parser.add_argument( - "--mean_std_filepath", - default='mean_std.npz', - type=str, - help="Manifest path for normalizer. (default: %(default)s)") -parser.add_argument( - "--train_manifest_path", - default='datasets/manifest.train', - type=str, - help="Manifest path for training. (default: %(default)s)") -parser.add_argument( - "--dev_manifest_path", - default='datasets/manifest.dev', - type=str, - help="Manifest path for validation. (default: %(default)s)") -parser.add_argument( - "--vocab_filepath", - default='datasets/vocab/eng_vocab.txt', - type=str, - help="Vocabulary filepath. (default: %(default)s)") -parser.add_argument( - "--init_model_path", - default=None, - type=str, - help="If set None, the training will start from scratch. " - "Otherwise, the training will resume from " - "the existing model of this path. (default: %(default)s)") -parser.add_argument( - "--output_model_dir", - default="./checkpoints", - type=str, - help="Directory for saving models. (default: %(default)s)") -parser.add_argument( - "--augmentation_config", - default=open('conf/augmentation.config', 'r').read(), - type=str, - help="Augmentation configuration in json-format. " - "(default: %(default)s)") -parser.add_argument( - "--is_local", - default=True, - type=distutils.util.strtobool, - help="Set to false if running with pserver in paddlecloud. " - "(default: %(default)s)") + + +def add_arg(argname, type, default, help, **kwargs): + type = distutils.util.strtobool if type == bool else type + parser.add_argument( + "--" + argname, + default=default, + type=type, + help=help + ' Default: %(default)s.', + **kwargs) + + +# yapf: disable +# configurations of optimization +add_arg('batch_size', int, 256, "Minibatch size.") +add_arg('learning_rate', float, 5e-4, "Learning rate.") +add_arg('use_sortagrad', bool, True, "Use SortaGrad or not.") +add_arg('trainer_count', int, 8, "# of Trainers (CPUs or GPUs).") +add_arg('use_gpu', bool, True, "Use GPU or not.") +add_arg('num_passes', int, 200, "# of training epochs.") +add_arg('is_local', bool, True, "Use pserver or not.") +add_arg('num_iter_print', int, 100, "Every # iterations for printing " + "train cost.") +# configurations of data preprocess +add_arg('max_duration', float, 27.0, "Longest audio duration allowed.") +add_arg('min_duration', float, 0.0, "Shortest audio duration allowed.") +add_arg('parallels_data', int, NUM_CPU,"# of CPUs for data preprocessing.") +add_arg('specgram_type', str, + 'linear', + "Audio feature type. Options: linear, mfcc.", + choices=['linear', 'mfcc']) +add_arg('augment_conf_path',str, + 'conf/augmentation.config', + "Filepath of augmentation configuration file (json-format).") +add_arg('shuffle_method', str, + 'batch_shuffle_clipped', + "Shuffle method.", + choices=['instance_shuffle', 'batch_shuffle', 'batch_shuffle_clipped']) +# configurations of model structure +add_arg('num_conv_layers', int, 2, "# of convolution layers.") +add_arg('num_rnn_layers', int, 3, "# of recurrent layers.") +add_arg('rnn_layer_size', int, 2048, "# of recurrent cells per layer.") +add_arg('use_gru', bool, False, "Use GRUs instead of Simple RNNs.") +add_arg('share_rnn_weights',bool, True, "Share input-hidden weights across " + "bi-directional RNNs. Not for GRU.") +# configurations of data io +add_arg('train_manifest', str, + 'datasets/manifest.train', + "Filepath of train manifest.") +add_arg('dev_manifest', str, + 'datasets/manifest.dev', + "Filepath of validation manifest.") +add_arg('mean_std_path', str, + 'mean_std.npz', + "Filepath of normalizer's mean & std.") +add_arg('vocab_path', str, + 'datasets/vocab/eng_vocab.txt', + "Filepath of vocabulary.") +# configurations of model io +add_arg('init_model_path', str, + None, + "If None, the training starts from scratch, " + "otherwise, it resumes from the pre-trained model.") +add_arg('output_model_dir', str, + "./checkpoints", + "Directory for saving checkpoints.") args = parser.parse_args() +# yapf: disable def train(): """DeepSpeech2 training.""" train_generator = DataGenerator( - vocab_filepath=args.vocab_filepath, - mean_std_filepath=args.mean_std_filepath, - augmentation_config=args.augmentation_config, + vocab_filepath=args.vocab_path, + mean_std_filepath=args.mean_std_path, + augmentation_config=open(args.augment_conf_path, 'r').read(), max_duration=args.max_duration, min_duration=args.min_duration, specgram_type=args.specgram_type, - num_threads=args.num_threads_data) + num_threads=args.parallels_data) dev_generator = DataGenerator( - vocab_filepath=args.vocab_filepath, - mean_std_filepath=args.mean_std_filepath, + vocab_filepath=args.vocab_path, + mean_std_filepath=args.mean_std_path, augmentation_config="{}", specgram_type=args.specgram_type, - num_threads=args.num_threads_data) + num_threads=args.parallels_data) train_batch_reader = train_generator.batch_reader_creator( - manifest_path=args.train_manifest_path, + manifest_path=args.train_manifest, batch_size=args.batch_size, min_batch_size=args.trainer_count, sortagrad=args.use_sortagrad if args.init_model_path is None else False, shuffle_method=args.shuffle_method) dev_batch_reader = dev_generator.batch_reader_creator( - manifest_path=args.dev_manifest_path, + manifest_path=args.dev_manifest, batch_size=args.batch_size, min_batch_size=1, # must be 1, but will have errors. sortagrad=False, @@ -184,21 +118,28 @@ def train(): rnn_layer_size=args.rnn_layer_size, use_gru=args.use_gru, pretrained_model_path=args.init_model_path, - share_rnn_weights=args.share_rnn_weights) + share_rnn_weights=args.share_weights) ds2_model.train( train_batch_reader=train_batch_reader, dev_batch_reader=dev_batch_reader, feeding_dict=train_generator.feeding, - learning_rate=args.adam_learning_rate, + learning_rate=args.learning_rate, gradient_clipping=400, num_passes=args.num_passes, - num_iterations_print=args.num_iterations_print, + num_iterations_print=args.num_iter_print, output_model_dir=args.output_model_dir, is_local=args.is_local) +def print_arguments(args): + print("----------- Configuration Arguments -----------") + for arg, value in sorted(vars(args).iteritems()): + print("%s: %s" % (arg, value)) + print("------------------------------------------------") + + def main(): - utils.print_arguments(args) + print_arguments(args) paddle.init(use_gpu=args.use_gpu, trainer_count=args.trainer_count) train() diff --git a/tune.py b/tune.py index d8001339e..eac7ccd30 100644 --- a/tune.py +++ b/tune.py @@ -1,4 +1,4 @@ -"""Parameters tuning for DeepSpeech2 model.""" +"""Beam search parameters tuning for DeepSpeech2 model.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function @@ -11,134 +11,71 @@ import paddle.v2 as paddle from data_utils.data import DataGenerator from model import DeepSpeech2Model from error_rate import wer -import utils +NUM_CPU = multiprocessing.cpu_count() // 2 parser = argparse.ArgumentParser(description=__doc__) -parser.add_argument( - "--num_samples", - default=100, - type=int, - help="Number of samples for parameters tuning. (default: %(default)s)") -parser.add_argument( - "--num_conv_layers", - default=2, - type=int, - help="Convolution layer number. (default: %(default)s)") -parser.add_argument( - "--num_rnn_layers", - default=3, - type=int, - help="RNN layer number. (default: %(default)s)") -parser.add_argument( - "--rnn_layer_size", - default=2048, - type=int, - help="RNN layer cell number. (default: %(default)s)") -parser.add_argument( - "--share_rnn_weights", - default=True, - type=distutils.util.strtobool, - help="Whether to share input-hidden weights between forword and backward " - "directional simple RNNs. Only available when use_gru=False. " - "(default: %(default)s)") -parser.add_argument( - "--use_gru", - default=False, - type=distutils.util.strtobool, - help="Use GRU or simple RNN. (default: %(default)s)") -parser.add_argument( - "--use_gpu", - default=True, - type=distutils.util.strtobool, - help="Use gpu or not. (default: %(default)s)") -parser.add_argument( - "--trainer_count", - default=8, - type=int, - help="Trainer number. (default: %(default)s)") -parser.add_argument( - "--num_threads_data", - default=1, - type=int, - help="Number of cpu threads for preprocessing data. (default: %(default)s)") -parser.add_argument( - "--num_processes_beam_search", - default=multiprocessing.cpu_count() // 2, - type=int, - help="Number of cpu processes for beam search. (default: %(default)s)") -parser.add_argument( - "--specgram_type", - default='linear', - type=str, - help="Feature type of audio data: 'linear' (power spectrum)" - " or 'mfcc'. (default: %(default)s)") -parser.add_argument( - "--mean_std_filepath", - default='mean_std.npz', - type=str, - help="Manifest path for normalizer. (default: %(default)s)") -parser.add_argument( - "--tune_manifest_path", - default='datasets/manifest.dev', - type=str, - help="Manifest path for tuning. (default: %(default)s)") -parser.add_argument( - "--model_filepath", - default='checkpoints/params.latest.tar.gz', - type=str, - help="Model filepath. (default: %(default)s)") -parser.add_argument( - "--vocab_filepath", - default='datasets/vocab/eng_vocab.txt', - type=str, - help="Vocabulary filepath. (default: %(default)s)") -parser.add_argument( - "--beam_size", - default=500, - type=int, - help="Width for beam search decoding. (default: %(default)d)") -parser.add_argument( - "--language_model_path", - default="lm/data/common_crawl_00.prune01111.trie.klm", - type=str, - help="Path for language model. (default: %(default)s)") -parser.add_argument( - "--alpha_from", - default=0.1, - type=float, - help="Where alpha starts from. (default: %(default)f)") -parser.add_argument( - "--num_alphas", - default=14, - type=int, - help="Number of candidate alphas. (default: %(default)d)") -parser.add_argument( - "--alpha_to", - default=0.36, - type=float, - help="Where alpha ends with. (default: %(default)f)") -parser.add_argument( - "--beta_from", - default=0.05, - type=float, - help="Where beta starts from. (default: %(default)f)") -parser.add_argument( - "--num_betas", - default=20, - type=float, - help="Number of candidate betas. (default: %(default)d)") -parser.add_argument( - "--beta_to", - default=1.0, - type=float, - help="Where beta ends with. (default: %(default)f)") -parser.add_argument( - "--cutoff_prob", - default=0.99, - type=float, - help="The cutoff probability of pruning" - "in beam search. (default: %(default)f)") + + +def add_arg(argname, type, default, help, **kwargs): + type = distutils.util.strtobool if type == bool else type + parser.add_argument( + "--" + argname, + default=default, + type=type, + help=help + ' Default: %(default)s.', + **kwargs) + + +# yapf: disable +# configurations of overall +add_arg('num_samples', int, 100, "# of samples to infer.") +add_arg('trainer_count', int, 8, "# of Trainers (CPUs or GPUs).") +add_arg('use_gpu', bool, True, "Use GPU or not.") +add_arg('error_rate_type', str, 'wer', "Error rate type for evaluation.", + choices=['wer', 'cer']) +# configurations of tuning parameters +add_arg('alpha_from', float, 0.1, "Where alpha starts tuning from.") +add_arg('alpha_to', float, 0.36, "Where alpha ends tuning with.") +add_arg('num_alphas', int, 14, "# of alpha candidates for tuning.") +add_arg('beta_from', float, 0.05, "Where beta starts tuning from.") +add_arg('beta_to', float, 0.36, "Where beta ends tuning with.") +add_arg('num_betas', int, 20, "# of beta candidates for tuning.") +# configurations of decoder +add_arg('beam_size', int, 500, "Beam search width.") +add_arg('cutoff_prob', float, 0.99, "Cutoff probability for pruning.") +add_arg('parallels_bsearch',int, NUM_CPU,"# of CPUs for beam search.") +add_arg('lang_model_path', str, + 'lm/data/common_crawl_00.prune01111.trie.klm', + "Filepath for language model.") +# configurations of data preprocess +add_arg('specgram_type', str, + 'linear', + "Audio feature type. Options: linear, mfcc.", + choices=['linear', 'mfcc']) +# configurations of model structure +add_arg('num_conv_layers', int, 2, "# of convolution layers.") +add_arg('num_rnn_layers', int, 3, "# of recurrent layers.") +add_arg('rnn_layer_size', int, 2048, "# of recurrent cells per layer.") +add_arg('use_gru', bool, False, "Use GRUs instead of Simple RNNs.") +add_arg('share_rnn_weights',bool, True, "Share input-hidden weights across " + "bi-directional RNNs. Not for GRU.") +# configurations of data io +add_arg('tune_manifest', str, + 'datasets/manifest.test', + "Filepath of manifest to tune.") +add_arg('mean_std_path', str, + 'mean_std.npz', + "Filepath of normalizer's mean & std.") +add_arg('vocab_path', str, + 'datasets/vocab/eng_vocab.txt', + "Filepath of vocabulary.") +# configurations of model io +add_arg('model_path', str, + './checkpoints/params.latest.tar.gz', + "If None, the training starts from scratch, " + "otherwise, it resumes from the pre-trained model.") args = parser.parse_args() +# yapf: disable def tune(): @@ -149,13 +86,13 @@ def tune(): raise ValueError("num_betas must be non-negative!") data_generator = DataGenerator( - vocab_filepath=args.vocab_filepath, - mean_std_filepath=args.mean_std_filepath, + vocab_filepath=args.vocab_path, + mean_std_filepath=args.mean_std_path, augmentation_config='{}', specgram_type=args.specgram_type, - num_threads=args.num_threads_data) + num_threads=1) batch_reader = data_generator.batch_reader_creator( - manifest_path=args.tune_manifest_path, + manifest_path=args.tune_manifest, batch_size=args.num_samples, sortagrad=False, shuffle_method=None) @@ -171,7 +108,7 @@ def tune(): num_rnn_layers=args.num_rnn_layers, rnn_layer_size=args.rnn_layer_size, use_gru=args.use_gru, - pretrained_model_path=args.model_filepath, + pretrained_model_path=args.model_path, share_rnn_weights=args.share_rnn_weights) # create grid for search @@ -184,14 +121,14 @@ def tune(): for alpha, beta in params_grid: result_transcripts = ds2_model.infer_batch( infer_data=tune_data, - decode_method='beam_search', + decoder_method='ctc_beam_search', beam_alpha=alpha, beam_beta=beta, beam_size=args.beam_size, cutoff_prob=args.cutoff_prob, vocab_list=data_generator.vocab_list, - language_model_path=args.language_model_path, - num_processes=args.num_processes_beam_search) + language_model_path=args.lang_model_path, + num_processes=args.parallels_bsearch) wer_sum, num_ins = 0.0, 0 for target, result in zip(target_transcripts, result_transcripts): wer_sum += wer(target, result) @@ -200,8 +137,15 @@ def tune(): (alpha, beta, wer_sum / num_ins)) +def print_arguments(args): + print("----------- Configuration Arguments -----------") + for arg, value in sorted(vars(args).iteritems()): + print("%s: %s" % (arg, value)) + print("------------------------------------------------") + + def main(): - utils.print_arguments(args) + print_arguments(args) paddle.init(use_gpu=args.use_gpu, trainer_count=args.trainer_count) tune() diff --git a/utils.py b/utils.py deleted file mode 100644 index 1d51e2042..000000000 --- a/utils.py +++ /dev/null @@ -1,25 +0,0 @@ -"""Contains common utility functions.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - - -def print_arguments(args): - """Print argparse's arguments. - - Usage: - - .. code-block:: python - - parser = argparse.ArgumentParser() - parser.add_argument("name", default="Jonh", type=str, help="User name.") - args = parser.parse_args() - print_arguments(args) - - :param args: Input argparse.Namespace for printing. - :type args: argparse.Namespace - """ - print("----- Configuration Arguments -----") - for arg, value in vars(args).iteritems(): - print("%s: %s" % (arg, value)) - print("------------------------------------") From dfd7652308972a2de02cdcdfb5d71e8ebf98c5df Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Tue, 5 Sep 2017 00:38:30 +0800 Subject: [PATCH 165/335] Rename ctc_best_path_decoder to ctc_greedy_decoder in unitest. --- tests/test_decoders.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/test_decoders.py b/tests/test_decoders.py index 99d8a8289..fa43879b8 100644 --- a/tests/test_decoders.py +++ b/tests/test_decoders.py @@ -49,16 +49,16 @@ class TestDecoders(unittest.TestCase): 0.15882358, 0.1235788, 0.23376776, 0.20510435, 0.00279306, 0.05294827, 0.22298418 ]] - self.best_path_result = ["ac'bdc", "b'da"] + self.greedy_result = ["ac'bdc", "b'da"] self.beam_search_result = ['acdc', "b'a"] - def test_best_path_decoder_1(self): - bst_result = ctc_best_path_decoder(self.probs_seq1, self.vocab_list) - self.assertEqual(bst_result, self.best_path_result[0]) + def test_greedy_decoder_1(self): + bst_result = ctc_greedy_decoder(self.probs_seq1, self.vocab_list) + self.assertEqual(bst_result, self.greedy_result[0]) - def test_best_path_decoder_2(self): - bst_result = ctc_best_path_decoder(self.probs_seq2, self.vocab_list) - self.assertEqual(bst_result, self.best_path_result[1]) + def test_greedy_decoder_2(self): + bst_result = ctc_greedy_decoder(self.probs_seq2, self.vocab_list) + self.assertEqual(bst_result, self.greedy_result[1]) def test_beam_search_decoder_1(self): beam_result = ctc_beam_search_decoder( From 792129166ab9c1a5380d6a20eebd33ac7b7b9766 Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Tue, 5 Sep 2017 12:23:41 +0800 Subject: [PATCH 166/335] Sort the config lines to make it look better. --- demo_server.py | 50 ++++++++++++++++++++++---------------------------- evaluate.py | 42 +++++++++++++++++++----------------------- infer.py | 41 +++++++++++++++++++---------------------- train.py | 43 +++++++++++++++++++------------------------ tune.py | 45 ++++++++++++++++++++------------------------- 5 files changed, 99 insertions(+), 122 deletions(-) diff --git a/demo_server.py b/demo_server.py index d2afa49b4..5eed3d2e0 100644 --- a/demo_server.py +++ b/demo_server.py @@ -27,41 +27,25 @@ def add_arg(argname, type, default, help, **kwargs): # yapf: disable -# configurations of overall add_arg('host_port', int, 8086, "Server's IP port.") -add_arg('host_ip', str, - 'localhost', - "Server's IP address.") -add_arg('speech_save_dir', str, - 'demo_cache', - "Directory to save demo audios.") -add_arg('use_gpu', bool, True, "Use GPU or not.") -# configurations of decoder add_arg('beam_size', int, 500, "Beam search width.") -add_arg('alpha', float, 0.36, "Coef of LM for beam search.") -add_arg('beta', float, 0.25, "Coef of WC for beam search.") -add_arg('cutoff_prob', float, 0.99, "Cutoff probability for pruning.") -add_arg('lang_model_path', str, - 'lm/data/common_crawl_00.prune01111.trie.klm', - "Filepath for language model.") -add_arg('decoder_method', str, - 'ctc_beam_search', - "Decoder method. Options: ctc_beam_search, ctc_greedy", - choices = ['ctc_beam_search', 'ctc_greedy']) -# configurations of data preprocess -add_arg('specgram_type', str, - 'linear', - "Audio feature type. Options: linear, mfcc.", - choices=['linear', 'mfcc']) -# configurations of model structure add_arg('num_conv_layers', int, 2, "# of convolution layers.") add_arg('num_rnn_layers', int, 3, "# of recurrent layers.") add_arg('rnn_layer_size', int, 2048, "# of recurrent cells per layer.") +add_arg('alpha', float, 0.36, "Coef of LM for beam search.") +add_arg('beta', float, 0.25, "Coef of WC for beam search.") +add_arg('cutoff_prob', float, 0.99, "Cutoff probability for pruning.") add_arg('use_gru', bool, False, "Use GRUs instead of Simple RNNs.") +add_arg('use_gpu', bool, True, "Use GPU or not.") add_arg('share_rnn_weights',bool, True, "Share input-hidden weights across " "bi-directional RNNs. Not for GRU.") -# configurations of data io -add_arg('warmup_manifest', str, +add_arg('host_ip', str, + 'localhost', + "Server's IP address.") +add_arg('speech_save_dir', str, + 'demo_cache', + "Directory to save demo audios.") +add_arg('warmup_manifest', str, 'datasets/manifest.test', "Filepath of manifest to warm up.") add_arg('mean_std_path', str, @@ -70,11 +54,21 @@ add_arg('mean_std_path', str, add_arg('vocab_path', str, 'datasets/vocab/eng_vocab.txt', "Filepath of vocabulary.") -# configurations of model io add_arg('model_path', str, './checkpoints/params.latest.tar.gz', "If None, the training starts from scratch, " "otherwise, it resumes from the pre-trained model.") +add_arg('lang_model_path', str, + 'lm/data/common_crawl_00.prune01111.trie.klm', + "Filepath for language model.") +add_arg('decoder_method', str, + 'ctc_beam_search', + "Decoder method. Options: ctc_beam_search, ctc_greedy", + choices = ['ctc_beam_search', 'ctc_greedy']) +add_arg('specgram_type', str, + 'linear', + "Audio feature type. Options: linear, mfcc.", + choices=['linear', 'mfcc']) args = parser.parse_args() # yapf: disable diff --git a/evaluate.py b/evaluate.py index 1adf42557..2c4127788 100644 --- a/evaluate.py +++ b/evaluate.py @@ -26,39 +26,21 @@ def add_arg(argname, type, default, help, **kwargs): # yapf: disable -# configurations of overall add_arg('batch_size', int, 128, "Minibatch size.") add_arg('trainer_count', int, 8, "# of Trainers (CPUs or GPUs).") -add_arg('use_gpu', bool, True, "Use GPU or not.") -add_arg('error_rate_type', str, 'wer', "Error rate type for evaluation.", - choices=['wer', 'cer']) -# configurations of decoder add_arg('beam_size', int, 500, "Beam search width.") -add_arg('alpha', float, 0.36, "Coef of LM for beam search.") -add_arg('beta', float, 0.25, "Coef of WC for beam search.") -add_arg('cutoff_prob', float, 0.99, "Cutoff probability for pruning.") add_arg('parallels_bsearch',int, NUM_CPU,"# of CPUs for beam search.") -add_arg('lang_model_path', str, - 'lm/data/common_crawl_00.prune01111.trie.klm', - "Filepath for language model.") -add_arg('decoder_method', str, - 'ctc_beam_search', - "Decoder method. Options: ctc_beam_search, ctc_greedy", - choices = ['ctc_beam_search', 'ctc_greedy']) -# configurations of data preprocess add_arg('parallels_data', int, NUM_CPU,"# of CPUs for data preprocessing.") -add_arg('specgram_type', str, - 'linear', - "Audio feature type. Options: linear, mfcc.", - choices=['linear', 'mfcc']) -# configurations of model structure add_arg('num_conv_layers', int, 2, "# of convolution layers.") add_arg('num_rnn_layers', int, 3, "# of recurrent layers.") add_arg('rnn_layer_size', int, 2048, "# of recurrent cells per layer.") +add_arg('alpha', float, 0.36, "Coef of LM for beam search.") +add_arg('beta', float, 0.25, "Coef of WC for beam search.") +add_arg('cutoff_prob', float, 0.99, "Cutoff probability for pruning.") add_arg('use_gru', bool, False, "Use GRUs instead of Simple RNNs.") +add_arg('use_gpu', bool, True, "Use GPU or not.") add_arg('share_rnn_weights',bool, True, "Share input-hidden weights across " "bi-directional RNNs. Not for GRU.") -# configurations of data io add_arg('test_manifest', str, 'datasets/manifest.test', "Filepath of manifest to evaluate.") @@ -68,11 +50,25 @@ add_arg('mean_std_path', str, add_arg('vocab_path', str, 'datasets/vocab/eng_vocab.txt', "Filepath of vocabulary.") -# configurations of model io add_arg('model_path', str, './checkpoints/params.latest.tar.gz', "If None, the training starts from scratch, " "otherwise, it resumes from the pre-trained model.") +add_arg('lang_model_path', str, + 'lm/data/common_crawl_00.prune01111.trie.klm', + "Filepath for language model.") +add_arg('decoder_method', str, + 'ctc_beam_search', + "Decoder method. Options: ctc_beam_search, ctc_greedy", + choices = ['ctc_beam_search', 'ctc_greedy']) +add_arg('error_rate_type', str, + 'wer', + "Error rate type for evaluation.", + choices=['wer', 'cer']) +add_arg('specgram_type', str, + 'linear', + "Audio feature type. Options: linear, mfcc.", + choices=['linear', 'mfcc']) args = parser.parse_args() # yapf: disable diff --git a/infer.py b/infer.py index cf02808c1..313f80c05 100644 --- a/infer.py +++ b/infer.py @@ -29,35 +29,18 @@ def add_arg(argname, type, default, help, **kwargs): # configurations of overall add_arg('num_samples', int, 10, "# of samples to infer.") add_arg('trainer_count', int, 8, "# of Trainers (CPUs or GPUs).") -add_arg('use_gpu', bool, True, "Use GPU or not.") -add_arg('error_rate_type', str, 'wer', "Error rate type for evaluation.", - choices=['wer', 'cer']) -# configurations of decoder add_arg('beam_size', int, 500, "Beam search width.") -add_arg('alpha', float, 0.36, "Coef of LM for beam search.") -add_arg('beta', float, 0.25, "Coef of WC for beam search.") -add_arg('cutoff_prob', float, 0.99, "Cutoff probability for pruning.") add_arg('parallels_bsearch',int, NUM_CPU,"# of CPUs for beam search.") -add_arg('lang_model_path', str, - 'lm/data/common_crawl_00.prune01111.trie.klm', - "Filepath for language model.") -add_arg('decoder_method', str, - 'ctc_beam_search', - "Decoder method. Options: ctc_beam_search, ctc_greedy", - choices = ['ctc_beam_search', 'ctc_greedy']) -# configurations of data preprocess -add_arg('specgram_type', str, - 'linear', - "Audio feature type. Options: linear, mfcc.", - choices=['linear', 'mfcc']) -# configurations of model structure add_arg('num_conv_layers', int, 2, "# of convolution layers.") add_arg('num_rnn_layers', int, 3, "# of recurrent layers.") add_arg('rnn_layer_size', int, 2048, "# of recurrent cells per layer.") +add_arg('alpha', float, 0.36, "Coef of LM for beam search.") +add_arg('beta', float, 0.25, "Coef of WC for beam search.") +add_arg('cutoff_prob', float, 0.99, "Cutoff probability for pruning.") add_arg('use_gru', bool, False, "Use GRUs instead of Simple RNNs.") +add_arg('use_gpu', bool, True, "Use GPU or not.") add_arg('share_rnn_weights',bool, True, "Share input-hidden weights across " "bi-directional RNNs. Not for GRU.") -# configurations of data io add_arg('infer_manifest', str, 'datasets/manifest.dev', "Filepath of manifest to infer.") @@ -67,11 +50,25 @@ add_arg('mean_std_path', str, add_arg('vocab_path', str, 'datasets/vocab/eng_vocab.txt', "Filepath of vocabulary.") -# configurations of model io +add_arg('lang_model_path', str, + 'lm/data/common_crawl_00.prune01111.trie.klm', + "Filepath for language model.") add_arg('model_path', str, './checkpoints/params.latest.tar.gz', "If None, the training starts from scratch, " "otherwise, it resumes from the pre-trained model.") +add_arg('decoder_method', str, + 'ctc_beam_search', + "Decoder method. Options: ctc_beam_search, ctc_greedy", + choices = ['ctc_beam_search', 'ctc_greedy']) +add_arg('error_rate_type', str, + 'wer', + "Error rate type for evaluation.", + choices=['wer', 'cer']) +add_arg('specgram_type', str, + 'linear', + "Audio feature type. Options: linear, mfcc.", + choices=['linear', 'mfcc']) args = parser.parse_args() # yapf: disable diff --git a/train.py b/train.py index d21e6a3bd..3d658d279 100644 --- a/train.py +++ b/train.py @@ -25,39 +25,24 @@ def add_arg(argname, type, default, help, **kwargs): # yapf: disable -# configurations of optimization add_arg('batch_size', int, 256, "Minibatch size.") -add_arg('learning_rate', float, 5e-4, "Learning rate.") -add_arg('use_sortagrad', bool, True, "Use SortaGrad or not.") add_arg('trainer_count', int, 8, "# of Trainers (CPUs or GPUs).") -add_arg('use_gpu', bool, True, "Use GPU or not.") add_arg('num_passes', int, 200, "# of training epochs.") -add_arg('is_local', bool, True, "Use pserver or not.") -add_arg('num_iter_print', int, 100, "Every # iterations for printing " - "train cost.") -# configurations of data preprocess -add_arg('max_duration', float, 27.0, "Longest audio duration allowed.") -add_arg('min_duration', float, 0.0, "Shortest audio duration allowed.") add_arg('parallels_data', int, NUM_CPU,"# of CPUs for data preprocessing.") -add_arg('specgram_type', str, - 'linear', - "Audio feature type. Options: linear, mfcc.", - choices=['linear', 'mfcc']) -add_arg('augment_conf_path',str, - 'conf/augmentation.config', - "Filepath of augmentation configuration file (json-format).") -add_arg('shuffle_method', str, - 'batch_shuffle_clipped', - "Shuffle method.", - choices=['instance_shuffle', 'batch_shuffle', 'batch_shuffle_clipped']) -# configurations of model structure add_arg('num_conv_layers', int, 2, "# of convolution layers.") add_arg('num_rnn_layers', int, 3, "# of recurrent layers.") add_arg('rnn_layer_size', int, 2048, "# of recurrent cells per layer.") +add_arg('num_iter_print', int, 100, "Every # iterations for printing " + "train cost.") +add_arg('learning_rate', float, 5e-4, "Learning rate.") +add_arg('max_duration', float, 27.0, "Longest audio duration allowed.") +add_arg('min_duration', float, 0.0, "Shortest audio duration allowed.") +add_arg('use_sortagrad', bool, True, "Use SortaGrad or not.") +add_arg('use_gpu', bool, True, "Use GPU or not.") +add_arg('is_local', bool, True, "Use pserver or not.") add_arg('use_gru', bool, False, "Use GRUs instead of Simple RNNs.") add_arg('share_rnn_weights',bool, True, "Share input-hidden weights across " "bi-directional RNNs. Not for GRU.") -# configurations of data io add_arg('train_manifest', str, 'datasets/manifest.train', "Filepath of train manifest.") @@ -70,7 +55,6 @@ add_arg('mean_std_path', str, add_arg('vocab_path', str, 'datasets/vocab/eng_vocab.txt', "Filepath of vocabulary.") -# configurations of model io add_arg('init_model_path', str, None, "If None, the training starts from scratch, " @@ -78,6 +62,17 @@ add_arg('init_model_path', str, add_arg('output_model_dir', str, "./checkpoints", "Directory for saving checkpoints.") +add_arg('augment_conf_path',str, + 'conf/augmentation.config', + "Filepath of augmentation configuration file (json-format).") +add_arg('specgram_type', str, + 'linear', + "Audio feature type. Options: linear, mfcc.", + choices=['linear', 'mfcc']) +add_arg('shuffle_method', str, + 'batch_shuffle_clipped', + "Shuffle method.", + choices=['instance_shuffle', 'batch_shuffle', 'batch_shuffle_clipped']) args = parser.parse_args() # yapf: disable diff --git a/tune.py b/tune.py index eac7ccd30..2fbe0b98f 100644 --- a/tune.py +++ b/tune.py @@ -27,40 +27,25 @@ def add_arg(argname, type, default, help, **kwargs): # yapf: disable -# configurations of overall add_arg('num_samples', int, 100, "# of samples to infer.") add_arg('trainer_count', int, 8, "# of Trainers (CPUs or GPUs).") -add_arg('use_gpu', bool, True, "Use GPU or not.") -add_arg('error_rate_type', str, 'wer', "Error rate type for evaluation.", - choices=['wer', 'cer']) -# configurations of tuning parameters -add_arg('alpha_from', float, 0.1, "Where alpha starts tuning from.") -add_arg('alpha_to', float, 0.36, "Where alpha ends tuning with.") -add_arg('num_alphas', int, 14, "# of alpha candidates for tuning.") -add_arg('beta_from', float, 0.05, "Where beta starts tuning from.") -add_arg('beta_to', float, 0.36, "Where beta ends tuning with.") -add_arg('num_betas', int, 20, "# of beta candidates for tuning.") -# configurations of decoder add_arg('beam_size', int, 500, "Beam search width.") -add_arg('cutoff_prob', float, 0.99, "Cutoff probability for pruning.") add_arg('parallels_bsearch',int, NUM_CPU,"# of CPUs for beam search.") -add_arg('lang_model_path', str, - 'lm/data/common_crawl_00.prune01111.trie.klm', - "Filepath for language model.") -# configurations of data preprocess -add_arg('specgram_type', str, - 'linear', - "Audio feature type. Options: linear, mfcc.", - choices=['linear', 'mfcc']) -# configurations of model structure add_arg('num_conv_layers', int, 2, "# of convolution layers.") add_arg('num_rnn_layers', int, 3, "# of recurrent layers.") add_arg('rnn_layer_size', int, 2048, "# of recurrent cells per layer.") +add_arg('num_alphas', int, 14, "# of alpha candidates for tuning.") +add_arg('num_betas', int, 20, "# of beta candidates for tuning.") +add_arg('alpha_from', float, 0.1, "Where alpha starts tuning from.") +add_arg('alpha_to', float, 0.36, "Where alpha ends tuning with.") +add_arg('beta_from', float, 0.05, "Where beta starts tuning from.") +add_arg('beta_to', float, 0.36, "Where beta ends tuning with.") +add_arg('cutoff_prob', float, 0.99, "Cutoff probability for pruning.") add_arg('use_gru', bool, False, "Use GRUs instead of Simple RNNs.") +add_arg('use_gpu', bool, True, "Use GPU or not.") add_arg('share_rnn_weights',bool, True, "Share input-hidden weights across " "bi-directional RNNs. Not for GRU.") -# configurations of data io -add_arg('tune_manifest', str, +add_arg('tune_manifest', str, 'datasets/manifest.test', "Filepath of manifest to tune.") add_arg('mean_std_path', str, @@ -69,11 +54,21 @@ add_arg('mean_std_path', str, add_arg('vocab_path', str, 'datasets/vocab/eng_vocab.txt', "Filepath of vocabulary.") -# configurations of model io +add_arg('lang_model_path', str, + 'lm/data/common_crawl_00.prune01111.trie.klm', + "Filepath for language model.") add_arg('model_path', str, './checkpoints/params.latest.tar.gz', "If None, the training starts from scratch, " "otherwise, it resumes from the pre-trained model.") +add_arg('error_rate_type', str, + 'wer', + "Error rate type for evaluation.", + choices=['wer', 'cer']) +add_arg('specgram_type', str, + 'linear', + "Audio feature type. Options: linear, mfcc.", + choices=['linear', 'mfcc']) args = parser.parse_args() # yapf: disable From 8b64ef29c8810387bf6adadf2e9a0087bf3d4812 Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Tue, 5 Sep 2017 12:48:11 +0800 Subject: [PATCH 167/335] Re-style the config codes for tools in DS2. --- demo_server.py | 3 +- evaluate.py | 9 ++--- infer.py | 8 ++--- tools/build_vocab.py | 50 ++++++++++++++++++---------- tools/compute_mean_std.py | 70 ++++++++++++++++++++------------------- train.py | 7 ++-- tune.py | 8 ++--- 7 files changed, 79 insertions(+), 76 deletions(-) diff --git a/demo_server.py b/demo_server.py index 5eed3d2e0..81b56f94b 100644 --- a/demo_server.py +++ b/demo_server.py @@ -13,8 +13,6 @@ from data_utils.data import DataGenerator from model import DeepSpeech2Model from data_utils.utils import read_manifest -parser = argparse.ArgumentParser(description=__doc__) - def add_arg(argname, type, default, help, **kwargs): type = distutils.util.strtobool if type == bool else type @@ -27,6 +25,7 @@ def add_arg(argname, type, default, help, **kwargs): # yapf: disable +parser = argparse.ArgumentParser(description=__doc__) add_arg('host_port', int, 8086, "Server's IP port.") add_arg('beam_size', int, 500, "Beam search width.") add_arg('num_conv_layers', int, 2, "# of convolution layers.") diff --git a/evaluate.py b/evaluate.py index 2c4127788..38204c572 100644 --- a/evaluate.py +++ b/evaluate.py @@ -5,15 +5,11 @@ from __future__ import print_function import distutils.util import argparse -import multiprocessing import paddle.v2 as paddle from data_utils.data import DataGenerator from model import DeepSpeech2Model from error_rate import wer, cer -NUM_CPU = multiprocessing.cpu_count() // 2 -parser = argparse.ArgumentParser(description=__doc__) - def add_arg(argname, type, default, help, **kwargs): type = distutils.util.strtobool if type == bool else type @@ -26,11 +22,12 @@ def add_arg(argname, type, default, help, **kwargs): # yapf: disable +parser = argparse.ArgumentParser(description=__doc__) add_arg('batch_size', int, 128, "Minibatch size.") add_arg('trainer_count', int, 8, "# of Trainers (CPUs or GPUs).") add_arg('beam_size', int, 500, "Beam search width.") -add_arg('parallels_bsearch',int, NUM_CPU,"# of CPUs for beam search.") -add_arg('parallels_data', int, NUM_CPU,"# of CPUs for data preprocessing.") +add_arg('parallels_bsearch',int, 12, "# of CPUs for beam search.") +add_arg('parallels_data', int, 12, "# of CPUs for data preprocessing.") add_arg('num_conv_layers', int, 2, "# of convolution layers.") add_arg('num_rnn_layers', int, 3, "# of recurrent layers.") add_arg('rnn_layer_size', int, 2048, "# of recurrent cells per layer.") diff --git a/infer.py b/infer.py index 313f80c05..e08cb1ca4 100644 --- a/infer.py +++ b/infer.py @@ -5,15 +5,11 @@ from __future__ import print_function import argparse import distutils.util -import multiprocessing import paddle.v2 as paddle from data_utils.data import DataGenerator from model import DeepSpeech2Model from error_rate import wer, cer -NUM_CPU = multiprocessing.cpu_count() // 2 -parser = argparse.ArgumentParser(description=__doc__) - def add_arg(argname, type, default, help, **kwargs): type = distutils.util.strtobool if type == bool else type @@ -26,11 +22,11 @@ def add_arg(argname, type, default, help, **kwargs): # yapf: disable -# configurations of overall +parser = argparse.ArgumentParser(description=__doc__) add_arg('num_samples', int, 10, "# of samples to infer.") add_arg('trainer_count', int, 8, "# of Trainers (CPUs or GPUs).") add_arg('beam_size', int, 500, "Beam search width.") -add_arg('parallels_bsearch',int, NUM_CPU,"# of CPUs for beam search.") +add_arg('parallels_bsearch',int, 12, "# of CPUs for beam search.") add_arg('num_conv_layers', int, 2, "# of convolution layers.") add_arg('num_rnn_layers', int, 3, "# of recurrent layers.") add_arg('rnn_layer_size', int, 2048, "# of recurrent cells per layer.") diff --git a/tools/build_vocab.py b/tools/build_vocab.py index 618f24985..f6cf6b9f3 100644 --- a/tools/build_vocab.py +++ b/tools/build_vocab.py @@ -14,26 +14,31 @@ import os.path import _init_paths from data_utils import utils + +def add_arg(argname, type, default, help, **kwargs): + type = distutils.util.strtobool if type == bool else type + parser.add_argument( + "--" + argname, + default=default, + type=type, + help=help + ' Default: %(default)s.', + **kwargs) + + +# yapf: disable parser = argparse.ArgumentParser(description=__doc__) -parser.add_argument( - "--manifest_paths", - type=str, - help="Manifest paths for building vocabulary." - "You can provide multiple manifest files.", - nargs='+', - required=True) -parser.add_argument( - "--count_threshold", - default=0, - type=int, - help="Characters whose counts are below the threshold will be truncated. " - "(default: %(default)i)") -parser.add_argument( - "--vocab_path", - default='datasets/vocab/zh_vocab.txt', - type=str, - help="File path to write the vocabulary. (default: %(default)s)") +add_arg('count_threshold', int, 0, "Truncation threshold for char counts.") +add_arg('vocab_path', str, + 'datasets/vocab/zh_vocab.txt', + "Filepath to write the vocabulary.") +add_arg('manifest_paths', str, + None, + "Filepaths of manifests for building vocabulary. " + "You can provide multiple manifest files.", + nargs='+', + required=True) args = parser.parse_args() +# yapf: disable def count_manifest(counter, manifest_path): @@ -43,7 +48,16 @@ def count_manifest(counter, manifest_path): counter.update(char) +def print_arguments(args): + print("----------- Configuration Arguments -----------") + for arg, value in sorted(vars(args).iteritems()): + print("%s: %s" % (arg, value)) + print("------------------------------------------------") + + def main(): + print_arguments(args) + counter = Counter() for manifest_path in args.manifest_paths: count_manifest(counter, manifest_path) diff --git a/tools/compute_mean_std.py b/tools/compute_mean_std.py index da49eb4c0..913a4334d 100644 --- a/tools/compute_mean_std.py +++ b/tools/compute_mean_std.py @@ -9,43 +9,45 @@ from data_utils.normalizer import FeatureNormalizer from data_utils.augmentor.augmentation import AugmentationPipeline from data_utils.featurizer.audio_featurizer import AudioFeaturizer -parser = argparse.ArgumentParser( - description='Computing mean and stddev for feature normalizer.') -parser.add_argument( - "--specgram_type", - default='linear', - type=str, - help="Feature type of audio data: 'linear' (power spectrum)" - " or 'mfcc'. (default: %(default)s)") -parser.add_argument( - "--manifest_path", - default='datasets/manifest.train', - type=str, - help="Manifest path for computing normalizer's mean and stddev." - "(default: %(default)s)") -parser.add_argument( - "--num_samples", - default=2000, - type=int, - help="Number of samples for computing mean and stddev. " - "(default: %(default)s)") -parser.add_argument( - "--augmentation_config", - default='{}', - type=str, - help="Augmentation configuration in json-format. " - "(default: %(default)s)") -parser.add_argument( - "--output_file", - default='mean_std.npz', - type=str, - help="Filepath to write mean and std to (.npz)." - "(default: %(default)s)") + +def add_arg(argname, type, default, help, **kwargs): + type = distutils.util.strtobool if type == bool else type + parser.add_argument( + "--" + argname, + default=default, + type=type, + help=help + ' Default: %(default)s.', + **kwargs) + + +# yapf: disable +parser = argparse.ArgumentParser(description=__doc__) +add_arg('num_samples', int, 2000, "# of samples to for statistics.") +add_arg('specgram_type', str, + 'linear', + "Audio feature type. Options: linear, mfcc.", + choices=['linear', 'mfcc']) +add_arg('manifest_path', str, + 'datasets/manifest.train', + "Filepath of manifest to compute normalizer's mean and stddev.") +add_arg('output_path', str, + 'mean_std.npz', + "Filepath of write mean and stddev to (.npz).") args = parser.parse_args() +# yapf: disable + + +def print_arguments(args): + print("----------- Configuration Arguments -----------") + for arg, value in sorted(vars(args).iteritems()): + print("%s: %s" % (arg, value)) + print("------------------------------------------------") def main(): - augmentation_pipeline = AugmentationPipeline(args.augmentation_config) + print_arguments(args) + + augmentation_pipeline = AugmentationPipeline('{}') audio_featurizer = AudioFeaturizer(specgram_type=args.specgram_type) def augment_and_featurize(audio_segment): @@ -57,7 +59,7 @@ def main(): manifest_path=args.manifest_path, featurize_func=augment_and_featurize, num_samples=args.num_samples) - normalizer.write_to_file(args.output_file) + normalizer.write_to_file(args.output_path) if __name__ == '__main__': diff --git a/train.py b/train.py index 3d658d279..bd00d21d3 100644 --- a/train.py +++ b/train.py @@ -5,14 +5,10 @@ from __future__ import print_function import argparse import distutils.util -import multiprocessing import paddle.v2 as paddle from model import DeepSpeech2Model from data_utils.data import DataGenerator -NUM_CPU = multiprocessing.cpu_count() // 2 -parser = argparse.ArgumentParser(description=__doc__) - def add_arg(argname, type, default, help, **kwargs): type = distutils.util.strtobool if type == bool else type @@ -25,10 +21,11 @@ def add_arg(argname, type, default, help, **kwargs): # yapf: disable +parser = argparse.ArgumentParser(description=__doc__) add_arg('batch_size', int, 256, "Minibatch size.") add_arg('trainer_count', int, 8, "# of Trainers (CPUs or GPUs).") add_arg('num_passes', int, 200, "# of training epochs.") -add_arg('parallels_data', int, NUM_CPU,"# of CPUs for data preprocessing.") +add_arg('parallels_data', int, 12, "# of CPUs for data preprocessing.") add_arg('num_conv_layers', int, 2, "# of convolution layers.") add_arg('num_rnn_layers', int, 3, "# of recurrent layers.") add_arg('rnn_layer_size', int, 2048, "# of recurrent cells per layer.") diff --git a/tune.py b/tune.py index 2fbe0b98f..e066596c7 100644 --- a/tune.py +++ b/tune.py @@ -6,15 +6,11 @@ from __future__ import print_function import numpy as np import distutils.util import argparse -import multiprocessing import paddle.v2 as paddle from data_utils.data import DataGenerator from model import DeepSpeech2Model from error_rate import wer -NUM_CPU = multiprocessing.cpu_count() // 2 -parser = argparse.ArgumentParser(description=__doc__) - def add_arg(argname, type, default, help, **kwargs): type = distutils.util.strtobool if type == bool else type @@ -27,10 +23,11 @@ def add_arg(argname, type, default, help, **kwargs): # yapf: disable +parser = argparse.ArgumentParser(description=__doc__) add_arg('num_samples', int, 100, "# of samples to infer.") add_arg('trainer_count', int, 8, "# of Trainers (CPUs or GPUs).") add_arg('beam_size', int, 500, "Beam search width.") -add_arg('parallels_bsearch',int, NUM_CPU,"# of CPUs for beam search.") +add_arg('parallels_bsearch',int, 12, "# of CPUs for beam search.") add_arg('num_conv_layers', int, 2, "# of convolution layers.") add_arg('num_rnn_layers', int, 3, "# of recurrent layers.") add_arg('rnn_layer_size', int, 2048, "# of recurrent cells per layer.") @@ -73,6 +70,7 @@ args = parser.parse_args() # yapf: disable + def tune(): """Tune parameters alpha and beta on one minibatch.""" if not args.num_alphas >= 0: From 9571b6fc0e186a14d10c4b464b8e65883d2ced4b Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Tue, 5 Sep 2017 14:23:27 +0800 Subject: [PATCH 168/335] Add back utils.py. --- demo_server.py | 26 +++++----------------- evaluate.py | 26 +++++----------------- infer.py | 26 +++++----------------- tools/build_vocab.py | 25 +++++---------------- tools/compute_mean_std.py | 25 +++++---------------- train.py | 26 +++++----------------- tune.py | 27 +++++----------------- utils.py | 47 +++++++++++++++++++++++++++++++++++++++ 8 files changed, 82 insertions(+), 146 deletions(-) create mode 100644 utils.py diff --git a/demo_server.py b/demo_server.py index 81b56f94b..6b73971a6 100644 --- a/demo_server.py +++ b/demo_server.py @@ -3,7 +3,7 @@ import os import time import random import argparse -import distutils.util +import functools from time import gmtime, strftime import SocketServer import struct @@ -12,20 +12,11 @@ import paddle.v2 as paddle from data_utils.data import DataGenerator from model import DeepSpeech2Model from data_utils.utils import read_manifest +from utils import add_arguments, print_arguments - -def add_arg(argname, type, default, help, **kwargs): - type = distutils.util.strtobool if type == bool else type - parser.add_argument( - "--" + argname, - default=default, - type=type, - help=help + ' Default: %(default)s.', - **kwargs) - - -# yapf: disable parser = argparse.ArgumentParser(description=__doc__) +add_arg = functools.partial(add_arguments, argparser=parser) +# yapf: disable add_arg('host_port', int, 8086, "Server's IP port.") add_arg('beam_size', int, 500, "Beam search width.") add_arg('num_conv_layers', int, 2, "# of convolution layers.") @@ -68,8 +59,8 @@ add_arg('specgram_type', str, 'linear', "Audio feature type. Options: linear, mfcc.", choices=['linear', 'mfcc']) -args = parser.parse_args() # yapf: disable +args = parser.parse_args() class AsrTCPServer(SocketServer.TCPServer): @@ -198,13 +189,6 @@ def start_server(): server.serve_forever() -def print_arguments(args): - print("----------- Configuration Arguments -----------") - for arg, value in sorted(vars(args).iteritems()): - print("%s: %s" % (arg, value)) - print("------------------------------------------------") - - def main(): print_arguments(args) paddle.init(use_gpu=args.use_gpu, trainer_count=1) diff --git a/evaluate.py b/evaluate.py index 38204c572..35888f82d 100644 --- a/evaluate.py +++ b/evaluate.py @@ -3,26 +3,17 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import distutils.util import argparse +import functools import paddle.v2 as paddle from data_utils.data import DataGenerator from model import DeepSpeech2Model from error_rate import wer, cer +from utils import add_arguments, print_arguments - -def add_arg(argname, type, default, help, **kwargs): - type = distutils.util.strtobool if type == bool else type - parser.add_argument( - "--" + argname, - default=default, - type=type, - help=help + ' Default: %(default)s.', - **kwargs) - - -# yapf: disable parser = argparse.ArgumentParser(description=__doc__) +add_arg = functools.partial(add_arguments, argparser=parser) +# yapf: disable add_arg('batch_size', int, 128, "Minibatch size.") add_arg('trainer_count', int, 8, "# of Trainers (CPUs or GPUs).") add_arg('beam_size', int, 500, "Beam search width.") @@ -66,8 +57,8 @@ add_arg('specgram_type', str, 'linear', "Audio feature type. Options: linear, mfcc.", choices=['linear', 'mfcc']) -args = parser.parse_args() # yapf: disable +args = parser.parse_args() def evaluate(): @@ -120,13 +111,6 @@ def evaluate(): (args.error_rate_type, num_ins, num_ins, error_sum / num_ins)) -def print_arguments(args): - print("----------- Configuration Arguments -----------") - for arg, value in sorted(vars(args).iteritems()): - print("%s: %s" % (arg, value)) - print("------------------------------------------------") - - def main(): print_arguments(args) paddle.init(use_gpu=args.use_gpu, trainer_count=args.trainer_count) diff --git a/infer.py b/infer.py index e08cb1ca4..9d4bff849 100644 --- a/infer.py +++ b/infer.py @@ -4,25 +4,16 @@ from __future__ import division from __future__ import print_function import argparse -import distutils.util +import functools import paddle.v2 as paddle from data_utils.data import DataGenerator from model import DeepSpeech2Model from error_rate import wer, cer +from utils import add_arguments, print_arguments - -def add_arg(argname, type, default, help, **kwargs): - type = distutils.util.strtobool if type == bool else type - parser.add_argument( - "--" + argname, - default=default, - type=type, - help=help + ' Default: %(default)s.', - **kwargs) - - -# yapf: disable parser = argparse.ArgumentParser(description=__doc__) +add_arg = functools.partial(add_arguments, argparser=parser) +# yapf: disable add_arg('num_samples', int, 10, "# of samples to infer.") add_arg('trainer_count', int, 8, "# of Trainers (CPUs or GPUs).") add_arg('beam_size', int, 500, "Beam search width.") @@ -65,8 +56,8 @@ add_arg('specgram_type', str, 'linear', "Audio feature type. Options: linear, mfcc.", choices=['linear', 'mfcc']) -args = parser.parse_args() # yapf: disable +args = parser.parse_args() def infer(): @@ -116,13 +107,6 @@ def infer(): (args.error_rate_type, error_rate_func(target, result))) -def print_arguments(args): - print("----------- Configuration Arguments -----------") - for arg, value in sorted(vars(args).iteritems()): - print("%s: %s" % (arg, value)) - print("------------------------------------------------") - - def main(): print_arguments(args) paddle.init(use_gpu=args.use_gpu, trainer_count=args.trainer_count) diff --git a/tools/build_vocab.py b/tools/build_vocab.py index f6cf6b9f3..ac6003026 100644 --- a/tools/build_vocab.py +++ b/tools/build_vocab.py @@ -7,26 +7,18 @@ from __future__ import division from __future__ import print_function import argparse +import functools import codecs import json from collections import Counter import os.path import _init_paths from data_utils import utils +from utils import add_arguments, print_arguments - -def add_arg(argname, type, default, help, **kwargs): - type = distutils.util.strtobool if type == bool else type - parser.add_argument( - "--" + argname, - default=default, - type=type, - help=help + ' Default: %(default)s.', - **kwargs) - - -# yapf: disable parser = argparse.ArgumentParser(description=__doc__) +add_arg = functools.partial(add_arguments, argparser=parser) +# yapf: disable add_arg('count_threshold', int, 0, "Truncation threshold for char counts.") add_arg('vocab_path', str, 'datasets/vocab/zh_vocab.txt', @@ -37,8 +29,8 @@ add_arg('manifest_paths', str, "You can provide multiple manifest files.", nargs='+', required=True) -args = parser.parse_args() # yapf: disable +args = parser.parse_args() def count_manifest(counter, manifest_path): @@ -48,13 +40,6 @@ def count_manifest(counter, manifest_path): counter.update(char) -def print_arguments(args): - print("----------- Configuration Arguments -----------") - for arg, value in sorted(vars(args).iteritems()): - print("%s: %s" % (arg, value)) - print("------------------------------------------------") - - def main(): print_arguments(args) diff --git a/tools/compute_mean_std.py b/tools/compute_mean_std.py index 913a4334d..9f7bf06ce 100644 --- a/tools/compute_mean_std.py +++ b/tools/compute_mean_std.py @@ -4,24 +4,16 @@ from __future__ import division from __future__ import print_function import argparse +import functools import _init_paths from data_utils.normalizer import FeatureNormalizer from data_utils.augmentor.augmentation import AugmentationPipeline from data_utils.featurizer.audio_featurizer import AudioFeaturizer +from utils import add_arguments, print_arguments - -def add_arg(argname, type, default, help, **kwargs): - type = distutils.util.strtobool if type == bool else type - parser.add_argument( - "--" + argname, - default=default, - type=type, - help=help + ' Default: %(default)s.', - **kwargs) - - -# yapf: disable parser = argparse.ArgumentParser(description=__doc__) +add_arg = functools.partial(add_arguments, argparser=parser) +# yapf: disable add_arg('num_samples', int, 2000, "# of samples to for statistics.") add_arg('specgram_type', str, 'linear', @@ -33,15 +25,8 @@ add_arg('manifest_path', str, add_arg('output_path', str, 'mean_std.npz', "Filepath of write mean and stddev to (.npz).") -args = parser.parse_args() # yapf: disable - - -def print_arguments(args): - print("----------- Configuration Arguments -----------") - for arg, value in sorted(vars(args).iteritems()): - print("%s: %s" % (arg, value)) - print("------------------------------------------------") +args = parser.parse_args() def main(): diff --git a/train.py b/train.py index bd00d21d3..966e1d9b6 100644 --- a/train.py +++ b/train.py @@ -4,24 +4,15 @@ from __future__ import division from __future__ import print_function import argparse -import distutils.util +import functools import paddle.v2 as paddle from model import DeepSpeech2Model from data_utils.data import DataGenerator +from utils import add_arguments, print_arguments - -def add_arg(argname, type, default, help, **kwargs): - type = distutils.util.strtobool if type == bool else type - parser.add_argument( - "--" + argname, - default=default, - type=type, - help=help + ' Default: %(default)s.', - **kwargs) - - -# yapf: disable parser = argparse.ArgumentParser(description=__doc__) +add_arg = functools.partial(add_arguments, argparser=parser) +# yapf: disable add_arg('batch_size', int, 256, "Minibatch size.") add_arg('trainer_count', int, 8, "# of Trainers (CPUs or GPUs).") add_arg('num_passes', int, 200, "# of training epochs.") @@ -70,8 +61,8 @@ add_arg('shuffle_method', str, 'batch_shuffle_clipped', "Shuffle method.", choices=['instance_shuffle', 'batch_shuffle', 'batch_shuffle_clipped']) -args = parser.parse_args() # yapf: disable +args = parser.parse_args() def train(): @@ -123,13 +114,6 @@ def train(): is_local=args.is_local) -def print_arguments(args): - print("----------- Configuration Arguments -----------") - for arg, value in sorted(vars(args).iteritems()): - print("%s: %s" % (arg, value)) - print("------------------------------------------------") - - def main(): print_arguments(args) paddle.init(use_gpu=args.use_gpu, trainer_count=args.trainer_count) diff --git a/tune.py b/tune.py index e066596c7..62e8f2884 100644 --- a/tune.py +++ b/tune.py @@ -4,26 +4,17 @@ from __future__ import division from __future__ import print_function import numpy as np -import distutils.util import argparse +import functools import paddle.v2 as paddle from data_utils.data import DataGenerator from model import DeepSpeech2Model from error_rate import wer +from utils import add_arguments, print_arguments - -def add_arg(argname, type, default, help, **kwargs): - type = distutils.util.strtobool if type == bool else type - parser.add_argument( - "--" + argname, - default=default, - type=type, - help=help + ' Default: %(default)s.', - **kwargs) - - -# yapf: disable parser = argparse.ArgumentParser(description=__doc__) +add_arg = functools.partial(add_arguments, argparser=parser) +# yapf: disable add_arg('num_samples', int, 100, "# of samples to infer.") add_arg('trainer_count', int, 8, "# of Trainers (CPUs or GPUs).") add_arg('beam_size', int, 500, "Beam search width.") @@ -66,9 +57,8 @@ add_arg('specgram_type', str, 'linear', "Audio feature type. Options: linear, mfcc.", choices=['linear', 'mfcc']) -args = parser.parse_args() # yapf: disable - +args = parser.parse_args() def tune(): @@ -130,13 +120,6 @@ def tune(): (alpha, beta, wer_sum / num_ins)) -def print_arguments(args): - print("----------- Configuration Arguments -----------") - for arg, value in sorted(vars(args).iteritems()): - print("%s: %s" % (arg, value)) - print("------------------------------------------------") - - def main(): print_arguments(args) paddle.init(use_gpu=args.use_gpu, trainer_count=args.trainer_count) diff --git a/utils.py b/utils.py new file mode 100644 index 000000000..2e489ade6 --- /dev/null +++ b/utils.py @@ -0,0 +1,47 @@ +"""Contains common utility functions.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import distutils.util + + +def print_arguments(args): + """Print argparse's arguments. + + Usage: + + .. code-block:: python + + parser = argparse.ArgumentParser() + parser.add_argument("name", default="Jonh", type=str, help="User name.") + args = parser.parse_args() + print_arguments(args) + + :param args: Input argparse.Namespace for printing. + :type args: argparse.Namespace + """ + print("----------- Configuration Arguments -----------") + for arg, value in sorted(vars(args).iteritems()): + print("%s: %s" % (arg, value)) + print("------------------------------------------------") + + +def add_arguments(argname, type, default, help, argparser, **kwargs): + """Add argparse's argument. + + Usage: + + .. code-block:: python + + parser = argparse.ArgumentParser() + add_argument("name", str, "Jonh", "User name.", parser) + args = parser.parse_args() + """ + type = distutils.util.strtobool if type == bool else type + argparser.add_argument( + "--" + argname, + default=default, + type=type, + help=help + ' Default: %(default)s.', + **kwargs) From e8f7a8fde1cbe78e3695c49804d9bbe8e305826d Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Tue, 5 Sep 2017 16:30:42 +0800 Subject: [PATCH 169/335] Update argument naming following Yibing's reviews. --- demo_server.py | 8 ++++---- evaluate.py | 16 ++++++++-------- infer.py | 12 ++++++------ model.py | 16 ++++++++-------- train.py | 8 ++++---- tune.py | 8 ++++---- 6 files changed, 34 insertions(+), 34 deletions(-) diff --git a/demo_server.py b/demo_server.py index 6b73971a6..7cbee1fd4 100644 --- a/demo_server.py +++ b/demo_server.py @@ -25,7 +25,7 @@ add_arg('rnn_layer_size', int, 2048, "# of recurrent cells per layer.") add_arg('alpha', float, 0.36, "Coef of LM for beam search.") add_arg('beta', float, 0.25, "Coef of WC for beam search.") add_arg('cutoff_prob', float, 0.99, "Cutoff probability for pruning.") -add_arg('use_gru', bool, False, "Use GRUs instead of Simple RNNs.") +add_arg('use_gru', bool, False, "Use GRUs instead of simple RNNs.") add_arg('use_gpu', bool, True, "Use GPU or not.") add_arg('share_rnn_weights',bool, True, "Share input-hidden weights across " "bi-directional RNNs. Not for GRU.") @@ -51,9 +51,9 @@ add_arg('model_path', str, add_arg('lang_model_path', str, 'lm/data/common_crawl_00.prune01111.trie.klm', "Filepath for language model.") -add_arg('decoder_method', str, +add_arg('decoding_method', str, 'ctc_beam_search', - "Decoder method. Options: ctc_beam_search, ctc_greedy", + "Decoding method. Options: ctc_beam_search, ctc_greedy", choices = ['ctc_beam_search', 'ctc_greedy']) add_arg('specgram_type', str, 'linear', @@ -160,7 +160,7 @@ def start_server(): feature = data_generator.process_utterance(filename, "") result_transcript = ds2_model.infer_batch( infer_data=[feature], - decoder_method=args.decoder_method, + decoding_method=args.decoding_method, beam_alpha=args.alpha, beam_beta=args.beta, beam_size=args.beam_size, diff --git a/evaluate.py b/evaluate.py index 35888f82d..1cc307dad 100644 --- a/evaluate.py +++ b/evaluate.py @@ -17,15 +17,15 @@ add_arg = functools.partial(add_arguments, argparser=parser) add_arg('batch_size', int, 128, "Minibatch size.") add_arg('trainer_count', int, 8, "# of Trainers (CPUs or GPUs).") add_arg('beam_size', int, 500, "Beam search width.") -add_arg('parallels_bsearch',int, 12, "# of CPUs for beam search.") -add_arg('parallels_data', int, 12, "# of CPUs for data preprocessing.") +add_arg('num_proc_bsearch', int, 12, "# of CPUs for beam search.") +add_arg('num_proc_data', int, 12, "# of CPUs for data preprocessing.") add_arg('num_conv_layers', int, 2, "# of convolution layers.") add_arg('num_rnn_layers', int, 3, "# of recurrent layers.") add_arg('rnn_layer_size', int, 2048, "# of recurrent cells per layer.") add_arg('alpha', float, 0.36, "Coef of LM for beam search.") add_arg('beta', float, 0.25, "Coef of WC for beam search.") add_arg('cutoff_prob', float, 0.99, "Cutoff probability for pruning.") -add_arg('use_gru', bool, False, "Use GRUs instead of Simple RNNs.") +add_arg('use_gru', bool, False, "Use GRUs instead of simple RNNs.") add_arg('use_gpu', bool, True, "Use GPU or not.") add_arg('share_rnn_weights',bool, True, "Share input-hidden weights across " "bi-directional RNNs. Not for GRU.") @@ -45,9 +45,9 @@ add_arg('model_path', str, add_arg('lang_model_path', str, 'lm/data/common_crawl_00.prune01111.trie.klm', "Filepath for language model.") -add_arg('decoder_method', str, +add_arg('decoding_method', str, 'ctc_beam_search', - "Decoder method. Options: ctc_beam_search, ctc_greedy", + "Decoding method. Options: ctc_beam_search, ctc_greedy", choices = ['ctc_beam_search', 'ctc_greedy']) add_arg('error_rate_type', str, 'wer', @@ -68,7 +68,7 @@ def evaluate(): mean_std_filepath=args.mean_std_path, augmentation_config='{}', specgram_type=args.specgram_type, - num_threads=args.parallels_data) + num_threads=args.num_proc_data) batch_reader = data_generator.batch_reader_creator( manifest_path=args.test_manifest, batch_size=args.batch_size, @@ -90,14 +90,14 @@ def evaluate(): for infer_data in batch_reader(): result_transcripts = ds2_model.infer_batch( infer_data=infer_data, - decoder_method=args.decoder_method, + decoding_method=args.decoding_method, beam_alpha=args.alpha, beam_beta=args.beta, beam_size=args.beam_size, cutoff_prob=args.cutoff_prob, vocab_list=data_generator.vocab_list, language_model_path=args.lang_model_path, - num_processes=args.parallels_bsearch) + num_processes=args.num_proc_bsearch) target_transcripts = [ ''.join([data_generator.vocab_list[token] for token in transcript]) for _, transcript in infer_data diff --git a/infer.py b/infer.py index 9d4bff849..3fd835b46 100644 --- a/infer.py +++ b/infer.py @@ -17,14 +17,14 @@ add_arg = functools.partial(add_arguments, argparser=parser) add_arg('num_samples', int, 10, "# of samples to infer.") add_arg('trainer_count', int, 8, "# of Trainers (CPUs or GPUs).") add_arg('beam_size', int, 500, "Beam search width.") -add_arg('parallels_bsearch',int, 12, "# of CPUs for beam search.") +add_arg('num_proc_bsearch', int, 12, "# of CPUs for beam search.") add_arg('num_conv_layers', int, 2, "# of convolution layers.") add_arg('num_rnn_layers', int, 3, "# of recurrent layers.") add_arg('rnn_layer_size', int, 2048, "# of recurrent cells per layer.") add_arg('alpha', float, 0.36, "Coef of LM for beam search.") add_arg('beta', float, 0.25, "Coef of WC for beam search.") add_arg('cutoff_prob', float, 0.99, "Cutoff probability for pruning.") -add_arg('use_gru', bool, False, "Use GRUs instead of Simple RNNs.") +add_arg('use_gru', bool, False, "Use GRUs instead of simple RNNs.") add_arg('use_gpu', bool, True, "Use GPU or not.") add_arg('share_rnn_weights',bool, True, "Share input-hidden weights across " "bi-directional RNNs. Not for GRU.") @@ -44,9 +44,9 @@ add_arg('model_path', str, './checkpoints/params.latest.tar.gz', "If None, the training starts from scratch, " "otherwise, it resumes from the pre-trained model.") -add_arg('decoder_method', str, +add_arg('decoding_method', str, 'ctc_beam_search', - "Decoder method. Options: ctc_beam_search, ctc_greedy", + "Decoding method. Options: ctc_beam_search, ctc_greedy", choices = ['ctc_beam_search', 'ctc_greedy']) add_arg('error_rate_type', str, 'wer', @@ -86,14 +86,14 @@ def infer(): share_rnn_weights=args.share_rnn_weights) result_transcripts = ds2_model.infer_batch( infer_data=infer_data, - decoder_method=args.decoder_method, + decoding_method=args.decoding_method, beam_alpha=args.alpha, beam_beta=args.beta, beam_size=args.beam_size, cutoff_prob=args.cutoff_prob, vocab_list=data_generator.vocab_list, language_model_path=args.lang_model_path, - num_processes=args.parallels_bsearch) + num_processes=args.num_proc_bsearch) error_rate_func = cer if args.error_rate_type == 'cer' else wer target_transcripts = [ diff --git a/model.py b/model.py index 894605bfd..06f692906 100644 --- a/model.py +++ b/model.py @@ -146,7 +146,7 @@ class DeepSpeech2Model(object): # run inference return self._loss_inferer.infer(input=infer_data) - def infer_batch(self, infer_data, decoder_method, beam_alpha, beam_beta, + def infer_batch(self, infer_data, decoding_method, beam_alpha, beam_beta, beam_size, cutoff_prob, vocab_list, language_model_path, num_processes): """Model inference. Infer the transcription for a batch of speech @@ -156,9 +156,9 @@ class DeepSpeech2Model(object): consisting of a tuple of audio features and transcription text (empty string). :type infer_data: list - :param decoder_method: Decoding method name, 'ctc_greedy' or - 'ctc_beam_search'. - :param decoder_method: string + :param decoding_method: Decoding method name, 'ctc_greedy' or + 'ctc_beam_search'. + :param decoding_method: string :param beam_alpha: Parameter associated with language model. :type beam_alpha: float :param beam_beta: Parameter associated with word count. @@ -190,13 +190,13 @@ class DeepSpeech2Model(object): ] # run decoder results = [] - if decoder_method == "ctc_greedy": + if decoding_method == "ctc_greedy": # best path decode for i, probs in enumerate(probs_split): output_transcription = ctc_greedy_decoder( probs_seq=probs, vocabulary=vocab_list) results.append(output_transcription) - elif decoder_method == "ctc_beam_search": + elif decoding_method == "ctc_beam_search": # initialize external scorer if self._ext_scorer == None: self._ext_scorer = LmScorer(beam_alpha, beam_beta, @@ -217,8 +217,8 @@ class DeepSpeech2Model(object): results = [result[0][1] for result in beam_search_results] else: - raise ValueError("Decoder method [%s] is not supported." % - decoder_method) + raise ValueError("Decoding method [%s] is not supported." % + decoding_method) return results def _create_parameters(self, model_path=None): diff --git a/train.py b/train.py index 966e1d9b6..7cef7539b 100644 --- a/train.py +++ b/train.py @@ -16,7 +16,7 @@ add_arg = functools.partial(add_arguments, argparser=parser) add_arg('batch_size', int, 256, "Minibatch size.") add_arg('trainer_count', int, 8, "# of Trainers (CPUs or GPUs).") add_arg('num_passes', int, 200, "# of training epochs.") -add_arg('parallels_data', int, 12, "# of CPUs for data preprocessing.") +add_arg('num_proc_data', int, 12, "# of CPUs for data preprocessing.") add_arg('num_conv_layers', int, 2, "# of convolution layers.") add_arg('num_rnn_layers', int, 3, "# of recurrent layers.") add_arg('rnn_layer_size', int, 2048, "# of recurrent cells per layer.") @@ -28,7 +28,7 @@ add_arg('min_duration', float, 0.0, "Shortest audio duration allowed.") add_arg('use_sortagrad', bool, True, "Use SortaGrad or not.") add_arg('use_gpu', bool, True, "Use GPU or not.") add_arg('is_local', bool, True, "Use pserver or not.") -add_arg('use_gru', bool, False, "Use GRUs instead of Simple RNNs.") +add_arg('use_gru', bool, False, "Use GRUs instead of simple RNNs.") add_arg('share_rnn_weights',bool, True, "Share input-hidden weights across " "bi-directional RNNs. Not for GRU.") add_arg('train_manifest', str, @@ -74,13 +74,13 @@ def train(): max_duration=args.max_duration, min_duration=args.min_duration, specgram_type=args.specgram_type, - num_threads=args.parallels_data) + num_threads=args.num_proc_data) dev_generator = DataGenerator( vocab_filepath=args.vocab_path, mean_std_filepath=args.mean_std_path, augmentation_config="{}", specgram_type=args.specgram_type, - num_threads=args.parallels_data) + num_threads=args.num_proc_data) train_batch_reader = train_generator.batch_reader_creator( manifest_path=args.train_manifest, batch_size=args.batch_size, diff --git a/tune.py b/tune.py index 62e8f2884..eab00cfdb 100644 --- a/tune.py +++ b/tune.py @@ -18,7 +18,7 @@ add_arg = functools.partial(add_arguments, argparser=parser) add_arg('num_samples', int, 100, "# of samples to infer.") add_arg('trainer_count', int, 8, "# of Trainers (CPUs or GPUs).") add_arg('beam_size', int, 500, "Beam search width.") -add_arg('parallels_bsearch',int, 12, "# of CPUs for beam search.") +add_arg('num_proc_bsearch', int, 12, "# of CPUs for beam search.") add_arg('num_conv_layers', int, 2, "# of convolution layers.") add_arg('num_rnn_layers', int, 3, "# of recurrent layers.") add_arg('rnn_layer_size', int, 2048, "# of recurrent cells per layer.") @@ -29,7 +29,7 @@ add_arg('alpha_to', float, 0.36, "Where alpha ends tuning with.") add_arg('beta_from', float, 0.05, "Where beta starts tuning from.") add_arg('beta_to', float, 0.36, "Where beta ends tuning with.") add_arg('cutoff_prob', float, 0.99, "Cutoff probability for pruning.") -add_arg('use_gru', bool, False, "Use GRUs instead of Simple RNNs.") +add_arg('use_gru', bool, False, "Use GRUs instead of simple RNNs.") add_arg('use_gpu', bool, True, "Use GPU or not.") add_arg('share_rnn_weights',bool, True, "Share input-hidden weights across " "bi-directional RNNs. Not for GRU.") @@ -104,14 +104,14 @@ def tune(): for alpha, beta in params_grid: result_transcripts = ds2_model.infer_batch( infer_data=tune_data, - decoder_method='ctc_beam_search', + decoding_method='ctc_beam_search', beam_alpha=alpha, beam_beta=beta, beam_size=args.beam_size, cutoff_prob=args.cutoff_prob, vocab_list=data_generator.vocab_list, language_model_path=args.lang_model_path, - num_processes=args.parallels_bsearch) + num_processes=args.num_proc_bsearch) wer_sum, num_ins = 0.0, 0 for target, result in zip(target_transcripts, result_transcripts): wer_sum += wer(target, result) From 0bbb9c3ee21e48a215ab226d6963077b3ab4a336 Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Tue, 5 Sep 2017 23:50:41 +0800 Subject: [PATCH 170/335] Re-organize folder structure and hierarchy for DS2. --- README.md | 0 cloud/README.md | 0 cloud/pcloud_submit.sh | 8 +++-- cloud/pcloud_train.sh | 32 +++++++++++++++---- cloud/pcloud_upload_data.sh | 9 ++++-- .../vocab => data/librispeech}/eng_vocab.txt | 0 {datasets => data}/librispeech/librispeech.py | 0 {datasets => data}/noise/chime3_background.py | 0 data_utils/augmentor/impulse_response.py | 9 +++--- data_utils/augmentor/noise_perturb.py | 9 +++--- data_utils/data.py | 4 +-- data_utils/featurizer/audio_featurizer.py | 2 +- data_utils/normalizer.py | 4 +-- data_utils/{utils.py => utility.py} | 0 datasets/run_all.sh | 13 -------- datasets/run_noise.sh | 10 ------ deploy/_init_paths.py | 19 +++++++++++ demo_client.py => deploy/demo_client.py | 0 demo_server.py => deploy/demo_server.py | 11 ++++--- evaluate.py | 12 +++---- examples/librispeech/generate.sh | 28 ++++++++++++++++ examples/librispeech/prepare_data.sh | 32 +++++++++++++++++++ examples/librispeech/run_test.sh | 28 ++++++++++++++++ examples/librispeech/run_train.sh | 30 +++++++++++++++++ examples/librispeech/run_tune.sh | 30 +++++++++++++++++ infer.py | 12 +++---- models/__init__.py | 0 model.py => models/model.py | 8 ++--- layer.py => models/network.py | 21 ++++++------ tools/build_vocab.py | 4 +-- tools/compute_mean_std.py | 2 +- tune.py => tools/tune.py | 15 +++++---- train.py | 16 +++++----- utils/__init__.py | 0 decoder.py => utils/decoder.py | 0 error_rate.py => utils/error_rate.py | 0 utils.py => utils/utility.py | 0 37 files changed, 269 insertions(+), 99 deletions(-) mode change 100755 => 100644 README.md mode change 100755 => 100644 cloud/README.md rename {datasets/vocab => data/librispeech}/eng_vocab.txt (100%) rename {datasets => data}/librispeech/librispeech.py (100%) rename {datasets => data}/noise/chime3_background.py (100%) rename data_utils/{utils.py => utility.py} (100%) delete mode 100644 datasets/run_all.sh delete mode 100644 datasets/run_noise.sh create mode 100644 deploy/_init_paths.py rename demo_client.py => deploy/demo_client.py (100%) rename demo_server.py => deploy/demo_server.py (96%) create mode 100644 examples/librispeech/generate.sh create mode 100644 examples/librispeech/prepare_data.sh create mode 100644 examples/librispeech/run_test.sh create mode 100644 examples/librispeech/run_train.sh create mode 100644 examples/librispeech/run_tune.sh create mode 100644 models/__init__.py rename model.py => models/model.py (98%) rename layer.py => models/network.py (95%) rename tune.py => tools/tune.py (93%) create mode 100644 utils/__init__.py rename decoder.py => utils/decoder.py (100%) rename error_rate.py => utils/error_rate.py (100%) rename utils.py => utils/utility.py (100%) diff --git a/README.md b/README.md old mode 100755 new mode 100644 diff --git a/cloud/README.md b/cloud/README.md old mode 100755 new mode 100644 diff --git a/cloud/pcloud_submit.sh b/cloud/pcloud_submit.sh index 3c9a1c260..378a7c6e6 100644 --- a/cloud/pcloud_submit.sh +++ b/cloud/pcloud_submit.sh @@ -1,7 +1,9 @@ -TRAIN_MANIFEST="cloud/cloud.manifest.train" -DEV_MANIFEST="cloud/cloud.manifest.dev" +#! /usr/bin/bash + +TRAIN_MANIFEST="cloud/cloud_manifests/cloud.manifest.train" +DEV_MANIFEST="cloud/cloud_manifests/cloud.manifest.dev" CLOUD_MODEL_DIR="./checkpoints" -BATCH_SIZE=256 +BATCH_SIZE=512 NUM_GPU=8 NUM_NODE=1 IS_LOCAL="True" diff --git a/cloud/pcloud_train.sh b/cloud/pcloud_train.sh index 75949574d..d04132f90 100644 --- a/cloud/pcloud_train.sh +++ b/cloud/pcloud_train.sh @@ -1,3 +1,5 @@ +#! /usr/bin/bash + TRAIN_MANIFEST=$1 DEV_MANIFEST=$2 MODEL_PATH=$3 @@ -14,11 +16,29 @@ python ./cloud/split_data.py \ --out_manifest_path='/local.manifest.dev' python -u train.py \ ---batch_size=$BATCH_SIZE \ ---use_gpu=1 \ +--batch_size=${BATCH_SIZE} \ --trainer_count=${NUM_GPU} \ ---num_threads_data=${NUM_GPU} \ +--num_passes=200 \ +--num_proc_data=${NUM_GPU} \ +--num_conv_layers=2 \ +--num_rnn_layers=3 \ +--rnn_layer_size=2048 \ +--num_iter_print=100 \ +--learning_rate=5e-4 \ +--max_duration=27.0 \ +--min_duration=0.0 \ +--use_sortagrad=True \ +--use_gru=False \ +--use_gpu=True \ --is_local=${IS_LOCAL} \ ---train_manifest_path='/local.manifest.train' \ ---dev_manifest_path='/local.manifest.dev' \ ---output_model_dir=${MODEL_PATH} 2>&1 | tee ./log/train.log +--share_rnn_weights=True \ +--train_manifest='/local.manifest.train' \ +--dev_manifest='/local.manifest.dev' \ +--mean_std_path='data/librispeech/mean_std.npz' \ +--vocab_path='data/librispeech/eng_vocab.txt' \ +--output_model_dir='./checkpoints' \ +--output_model_dir=${MODEL_PATH} \ +--augment_conf_path='conf/augmentation.config' \ +--specgram_type='linear' \ +--shuffle_method='batch_shuffle_clipped' \ +2>&1 | tee ./log/train.log diff --git a/cloud/pcloud_upload_data.sh b/cloud/pcloud_upload_data.sh index 97a0ab181..4ef235ef7 100644 --- a/cloud/pcloud_upload_data.sh +++ b/cloud/pcloud_upload_data.sh @@ -1,5 +1,9 @@ -IN_MANIFESTS="../datasets/manifest.train ../datasets/manifest.dev ../datasets/manifest.test" -OUT_MANIFESTS="./cloud.manifest.train ./cloud.manifest.dev ./cloud.manifest.test" +#! /usr/bin/bash + +mkdir cloud_manifests + +IN_MANIFESTS="../data/librispeech/manifest.train ../data/librispeech/manifest.dev-clean ../data/librispeech/manifest.test-clean" +OUT_MANIFESTS="cloud_manifests/cloud.manifest.train cloud_manifests/cloud.manifest.dev cloud_manifests/cloud.manifest.test" CLOUD_DATA_DIR="/pfs/dlnel/home/USERNAME/deepspeech2/data/librispeech" NUM_SHARDS=50 @@ -14,4 +18,5 @@ then echo "Upload Data Failed!" exit 1 fi + echo "All Done." diff --git a/datasets/vocab/eng_vocab.txt b/data/librispeech/eng_vocab.txt similarity index 100% rename from datasets/vocab/eng_vocab.txt rename to data/librispeech/eng_vocab.txt diff --git a/datasets/librispeech/librispeech.py b/data/librispeech/librispeech.py similarity index 100% rename from datasets/librispeech/librispeech.py rename to data/librispeech/librispeech.py diff --git a/datasets/noise/chime3_background.py b/data/noise/chime3_background.py similarity index 100% rename from datasets/noise/chime3_background.py rename to data/noise/chime3_background.py diff --git a/data_utils/augmentor/impulse_response.py b/data_utils/augmentor/impulse_response.py index c3de0fdbb..536b4d6a4 100644 --- a/data_utils/augmentor/impulse_response.py +++ b/data_utils/augmentor/impulse_response.py @@ -4,23 +4,22 @@ from __future__ import division from __future__ import print_function from data_utils.augmentor.base import AugmentorBase -from data_utils import utils +from data_utils.utility import read_manifest from data_utils.audio import AudioSegment class ImpulseResponseAugmentor(AugmentorBase): """Augmentation model for adding impulse response effect. - + :param rng: Random generator object. :type rng: random.Random :param impulse_manifest_path: Manifest path for impulse audio data. - :type impulse_manifest_path: basestring + :type impulse_manifest_path: basestring """ def __init__(self, rng, impulse_manifest_path): self._rng = rng - self._impulse_manifest = utils.read_manifest( - manifest_path=impulse_manifest_path) + self._impulse_manifest = read_manifest(impulse_manifest_path) def transform_audio(self, audio_segment): """Add impulse response effect. diff --git a/data_utils/augmentor/noise_perturb.py b/data_utils/augmentor/noise_perturb.py index 281174af4..96e0ff4de 100644 --- a/data_utils/augmentor/noise_perturb.py +++ b/data_utils/augmentor/noise_perturb.py @@ -4,13 +4,13 @@ from __future__ import division from __future__ import print_function from data_utils.augmentor.base import AugmentorBase -from data_utils import utils +from data_utils.utility import read_manifest from data_utils.audio import AudioSegment class NoisePerturbAugmentor(AugmentorBase): """Augmentation model for adding background noise. - + :param rng: Random generator object. :type rng: random.Random :param min_snr_dB: Minimal signal noise ratio, in decibels. @@ -18,15 +18,14 @@ class NoisePerturbAugmentor(AugmentorBase): :param max_snr_dB: Maximal signal noise ratio, in decibels. :type max_snr_dB: float :param noise_manifest_path: Manifest path for noise audio data. - :type noise_manifest_path: basestring + :type noise_manifest_path: basestring """ def __init__(self, rng, min_snr_dB, max_snr_dB, noise_manifest_path): self._min_snr_dB = min_snr_dB self._max_snr_dB = max_snr_dB self._rng = rng - self._noise_manifest = utils.read_manifest( - manifest_path=noise_manifest_path) + self._noise_manifest = read_manifest(manifest_path=noise_manifest_path) def transform_audio(self, audio_segment): """Add background noise audio. diff --git a/data_utils/data.py b/data_utils/data.py index 33fcadc7b..8bff6826d 100644 --- a/data_utils/data.py +++ b/data_utils/data.py @@ -11,7 +11,7 @@ import multiprocessing import numpy as np import paddle.v2 as paddle from threading import local -from data_utils import utils +from data_utils.utility import read_manifest from data_utils.augmentor.augmentation import AugmentationPipeline from data_utils.featurizer.speech_featurizer import SpeechFeaturizer from data_utils.speech import SpeechSegment @@ -159,7 +159,7 @@ class DataGenerator(object): def batch_reader(): # read manifest - manifest = utils.read_manifest( + manifest = read_manifest( manifest_path=manifest_path, max_duration=self._max_duration, min_duration=self._min_duration) diff --git a/data_utils/featurizer/audio_featurizer.py b/data_utils/featurizer/audio_featurizer.py index 39f453017..12f8784a9 100644 --- a/data_utils/featurizer/audio_featurizer.py +++ b/data_utils/featurizer/audio_featurizer.py @@ -4,7 +4,7 @@ from __future__ import division from __future__ import print_function import numpy as np -from data_utils import utils +from data_utils.utility import read_manifest from data_utils.audio import AudioSegment from python_speech_features import mfcc from python_speech_features import delta diff --git a/data_utils/normalizer.py b/data_utils/normalizer.py index 1f4aae9a0..7c2e05c9d 100644 --- a/data_utils/normalizer.py +++ b/data_utils/normalizer.py @@ -5,7 +5,7 @@ from __future__ import print_function import numpy as np import random -import data_utils.utils as utils +from data_utils.utility import read_manifest from data_utils.audio import AudioSegment @@ -75,7 +75,7 @@ class FeatureNormalizer(object): def _compute_mean_std(self, manifest_path, featurize_func, num_samples): """Compute mean and std from randomly sampled instances.""" - manifest = utils.read_manifest(manifest_path) + manifest = read_manifest(manifest_path) sampled_manifest = self._rng.sample(manifest, num_samples) features = [] for instance in sampled_manifest: diff --git a/data_utils/utils.py b/data_utils/utility.py similarity index 100% rename from data_utils/utils.py rename to data_utils/utility.py diff --git a/datasets/run_all.sh b/datasets/run_all.sh deleted file mode 100644 index ef2b721fb..000000000 --- a/datasets/run_all.sh +++ /dev/null @@ -1,13 +0,0 @@ -cd librispeech -python librispeech.py -if [ $? -ne 0 ]; then - echo "Prepare LibriSpeech failed. Terminated." - exit 1 -fi -cd - - -cat librispeech/manifest.train* | shuf > manifest.train -cat librispeech/manifest.dev-clean > manifest.dev -cat librispeech/manifest.test-clean > manifest.test - -echo "All done." diff --git a/datasets/run_noise.sh b/datasets/run_noise.sh deleted file mode 100644 index 7b27abde4..000000000 --- a/datasets/run_noise.sh +++ /dev/null @@ -1,10 +0,0 @@ -cd noise -python chime3_background.py -if [ $? -ne 0 ]; then - echo "Prepare CHiME3 background noise failed. Terminated." - exit 1 -fi -cd - - -cat noise/manifest.* > manifest.noise -echo "All done." diff --git a/deploy/_init_paths.py b/deploy/_init_paths.py new file mode 100644 index 000000000..ddabb535b --- /dev/null +++ b/deploy/_init_paths.py @@ -0,0 +1,19 @@ +"""Set up paths for DS2""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os.path +import sys + + +def add_path(path): + if path not in sys.path: + sys.path.insert(0, path) + + +this_dir = os.path.dirname(__file__) + +# Add project path to PYTHONPATH +proj_path = os.path.join(this_dir, '..') +add_path(proj_path) diff --git a/demo_client.py b/deploy/demo_client.py similarity index 100% rename from demo_client.py rename to deploy/demo_client.py diff --git a/demo_server.py b/deploy/demo_server.py similarity index 96% rename from demo_server.py rename to deploy/demo_server.py index 7cbee1fd4..658b14197 100644 --- a/demo_server.py +++ b/deploy/demo_server.py @@ -9,10 +9,11 @@ import SocketServer import struct import wave import paddle.v2 as paddle +import _init_paths from data_utils.data import DataGenerator -from model import DeepSpeech2Model +from models.model import DeepSpeech2Model from data_utils.utils import read_manifest -from utils import add_arguments, print_arguments +from utils.utility import add_arguments, print_arguments parser = argparse.ArgumentParser(description=__doc__) add_arg = functools.partial(add_arguments, argparser=parser) @@ -36,13 +37,13 @@ add_arg('speech_save_dir', str, 'demo_cache', "Directory to save demo audios.") add_arg('warmup_manifest', str, - 'datasets/manifest.test', + 'data/librispeech/manifest.test-clean', "Filepath of manifest to warm up.") add_arg('mean_std_path', str, - 'mean_std.npz', + 'data/librispeech/mean_std.npz', "Filepath of normalizer's mean & std.") add_arg('vocab_path', str, - 'datasets/vocab/eng_vocab.txt', + 'data/librispeech/eng_vocab.txt', "Filepath of vocabulary.") add_arg('model_path', str, './checkpoints/params.latest.tar.gz', diff --git a/evaluate.py b/evaluate.py index 1cc307dad..747e40df8 100644 --- a/evaluate.py +++ b/evaluate.py @@ -7,9 +7,9 @@ import argparse import functools import paddle.v2 as paddle from data_utils.data import DataGenerator -from model import DeepSpeech2Model -from error_rate import wer, cer -from utils import add_arguments, print_arguments +from models.model import DeepSpeech2Model +from utils.error_rate import wer, cer +from utils.utility import add_arguments, print_arguments parser = argparse.ArgumentParser(description=__doc__) add_arg = functools.partial(add_arguments, argparser=parser) @@ -30,13 +30,13 @@ add_arg('use_gpu', bool, True, "Use GPU or not.") add_arg('share_rnn_weights',bool, True, "Share input-hidden weights across " "bi-directional RNNs. Not for GRU.") add_arg('test_manifest', str, - 'datasets/manifest.test', + 'data/librispeech/manifest.test-clean', "Filepath of manifest to evaluate.") add_arg('mean_std_path', str, - 'mean_std.npz', + 'data/librispeech/mean_std.npz', "Filepath of normalizer's mean & std.") add_arg('vocab_path', str, - 'datasets/vocab/eng_vocab.txt', + 'data/librispeech/eng_vocab.txt', "Filepath of vocabulary.") add_arg('model_path', str, './checkpoints/params.latest.tar.gz', diff --git a/examples/librispeech/generate.sh b/examples/librispeech/generate.sh new file mode 100644 index 000000000..a34b7bc10 --- /dev/null +++ b/examples/librispeech/generate.sh @@ -0,0 +1,28 @@ +#! /usr/bin/bash + +pushd ../.. + +CUDA_VISIBLE_DEVICES=0 \ +python -u infer.py \ +--num_samples=10 \ +--trainer_count=1 \ +--beam_size=500 \ +--num_proc_bsearch=12 \ +--num_proc_data=12 \ +--num_conv_layers=2 \ +--num_rnn_layers=3 \ +--rnn_layer_size=2048 \ +--alpha=0.36 \ +--beta=0.25 \ +--cutoff_prob=0.99 \ +--use_gru=False \ +--use_gpu=True \ +--share_rnn_weights=True \ +--infer_manifest='data/librispeech/manifest.dev-clean' \ +--mean_std_path='data/librispeech/mean_std.npz' \ +--vocab_path='data/librispeech/eng_vocab.txt' \ +--model_path='checkpoints/params.latest.tar.gz' \ +--lang_model_path='lm/data/common_crawl_00.prune01111.trie.klm' \ +--decoding_method='ctc_beam_search' \ +--error_rate_type='wer' \ +--specgram_type='linear' diff --git a/examples/librispeech/prepare_data.sh b/examples/librispeech/prepare_data.sh new file mode 100644 index 000000000..162a38c49 --- /dev/null +++ b/examples/librispeech/prepare_data.sh @@ -0,0 +1,32 @@ +#! /usr/bin/bash + +pushd ../.. + +# download data, generate manifests +python data/librispeech/librispeech.py \ +--manifest_prefix='data/librispeech/manifest' \ +--full_download='True' \ +--target_dir='~/.cache/paddle/dataset/speech/Libri' + +if [ $? -ne 0 ]; then + echo "Prepare LibriSpeech failed. Terminated." + exit 1 +fi + +#cat data/librispeech/manifest.train* | shuf > data/librispeech/manifest.train + + +# compute mean and stddev for normalizer +python tools/compute_mean_std.py \ +--manifest_path='data/librispeech/manifest.train' \ +--num_samples=2000 \ +--specgram_type='linear' \ +--output_path='data/librispeech/mean_std.npz' + +if [ $? -ne 0 ]; then + echo "Compute mean and stddev failed. Terminated." + exit 1 +fi + + +echo "LibriSpeech Data preparation done." diff --git a/examples/librispeech/run_test.sh b/examples/librispeech/run_test.sh new file mode 100644 index 000000000..5a14cb682 --- /dev/null +++ b/examples/librispeech/run_test.sh @@ -0,0 +1,28 @@ +#! /usr/bin/bash + +pushd ../.. + +CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ +python -u evaluate.py \ +--batch_size=128 \ +--trainer_count=8 \ +--beam_size=500 \ +--num_proc_bsearch=12 \ +--num_proc_data=12 \ +--num_conv_layers=2 \ +--num_rnn_layers=3 \ +--rnn_layer_size=2048 \ +--alpha=0.36 \ +--beta=0.25 \ +--cutoff_prob=0.99 \ +--use_gru=False \ +--use_gpu=True \ +--share_rnn_weights=True \ +--test_manifest='data/librispeech/manifest.test-clean' \ +--mean_std_path='data/librispeech/mean_std.npz' \ +--vocab_path='data/librispeech/eng_vocab.txt' \ +--model_path='checkpoints/params.latest.tar.gz' \ +--lang_model_path='lm/data/common_crawl_00.prune01111.trie.klm' \ +--decoding_method='ctc_beam_search' \ +--error_rate_type='wer' \ +--specgram_type='linear' diff --git a/examples/librispeech/run_train.sh b/examples/librispeech/run_train.sh new file mode 100644 index 000000000..832838a81 --- /dev/null +++ b/examples/librispeech/run_train.sh @@ -0,0 +1,30 @@ +#! /usr/bin/bash + +pushd ../.. + +CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ +python -u train.py \ +--batch_size=256 \ +--trainer_count=8 \ +--num_passes=200 \ +--num_proc_data=12 \ +--num_conv_layers=2 \ +--num_rnn_layers=3 \ +--rnn_layer_size=2048 \ +--num_iter_print=100 \ +--learning_rate=5e-4 \ +--max_duration=27.0 \ +--min_duration=0.0 \ +--use_sortagrad=True \ +--use_gru=False \ +--use_gpu=True \ +--is_local=True \ +--share_rnn_weights=True \ +--train_manifest='data/librispeech/manifest.train' \ +--dev_manifest='data/librispeech/manifest.dev' \ +--mean_std_path='data/librispeech/mean_std.npz' \ +--vocab_path='data/librispeech/eng_vocab.txt' \ +--output_model_dir='./checkpoints' \ +--augment_conf_path='conf/augmentation.config' \ +--specgram_type='linear' \ +--shuffle_method='batch_shuffle_clipped' diff --git a/examples/librispeech/run_tune.sh b/examples/librispeech/run_tune.sh new file mode 100644 index 000000000..9d992e884 --- /dev/null +++ b/examples/librispeech/run_tune.sh @@ -0,0 +1,30 @@ +#! /usr/bin/bash + +pushd ../.. + +CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ +python -u tools/tune.py \ +--num_samples=100 \ +--trainer_count=8 \ +--beam_size=500 \ +--num_proc_bsearch=12 \ +--num_conv_layers=2 \ +--num_rnn_layers=3 \ +--rnn_layer_size=2048 \ +--num_alphas=14 \ +--num_betas=20 \ +--alpha_from=0.1 \ +--alpha_to=0.36 \ +--beta_from=0.05 \ +--beta_to=1.0 \ +--cutoff_prob=0.99 \ +--use_gru=False \ +--use_gpu=True \ +--share_rnn_weights=True \ +--tune_manifest='data/librispeech/manifest.dev-clean' \ +--mean_std_path='data/librispeech/mean_std.npz' \ +--vocab_path='data/librispeech/eng_vocab.txt' \ +--model_path='checkpoints/params.latest.tar.gz' \ +--lang_model_path='lm/data/common_crawl_00.prune01111.trie.klm' \ +--error_rate_type='wer' \ +--specgram_type='linear' diff --git a/infer.py b/infer.py index 3fd835b46..1ce969ae0 100644 --- a/infer.py +++ b/infer.py @@ -7,9 +7,9 @@ import argparse import functools import paddle.v2 as paddle from data_utils.data import DataGenerator -from model import DeepSpeech2Model -from error_rate import wer, cer -from utils import add_arguments, print_arguments +from models.model import DeepSpeech2Model +from utils.error_rate import wer, cer +from utils.utility import add_arguments, print_arguments parser = argparse.ArgumentParser(description=__doc__) add_arg = functools.partial(add_arguments, argparser=parser) @@ -29,13 +29,13 @@ add_arg('use_gpu', bool, True, "Use GPU or not.") add_arg('share_rnn_weights',bool, True, "Share input-hidden weights across " "bi-directional RNNs. Not for GRU.") add_arg('infer_manifest', str, - 'datasets/manifest.dev', + 'data/librispeech/manifest.dev-clean', "Filepath of manifest to infer.") add_arg('mean_std_path', str, - 'mean_std.npz', + 'data/librispeech/mean_std.npz', "Filepath of normalizer's mean & std.") add_arg('vocab_path', str, - 'datasets/vocab/eng_vocab.txt', + 'data/librispeech/eng_vocab.txt', "Filepath of vocabulary.") add_arg('lang_model_path', str, 'lm/data/common_crawl_00.prune01111.trie.klm', diff --git a/models/__init__.py b/models/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/model.py b/models/model.py similarity index 98% rename from model.py rename to models/model.py index 06f692906..3e6fc328a 100644 --- a/model.py +++ b/models/model.py @@ -7,10 +7,10 @@ import sys import os import time import gzip -from decoder import * -from lm.lm_scorer import LmScorer import paddle.v2 as paddle -from layer import * +from utils.decoder import ctc_greedy_decoder, ctc_beam_search_decoder +from lm.lm_scorer import LmScorer +from models.network import deep_speech_v2_network class DeepSpeech2Model(object): @@ -241,7 +241,7 @@ class DeepSpeech2Model(object): text_data = paddle.layer.data( name="transcript_text", type=paddle.data_type.integer_value_sequence(vocab_size)) - self._log_probs, self._loss = deep_speech2( + self._log_probs, self._loss = deep_speech_v2_network( audio_data=audio_data, text_data=text_data, dict_size=vocab_size, diff --git a/layer.py b/models/network.py similarity index 95% rename from layer.py rename to models/network.py index b7ac3c23e..13ba5d2c9 100644 --- a/layer.py +++ b/models/network.py @@ -1,4 +1,4 @@ -"""Contains DeepSpeech2 layers.""" +"""Contains DeepSpeech2 layers and networks.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function @@ -205,16 +205,15 @@ def rnn_group(input, size, num_stacks, use_gru, share_rnn_weights): return output -def deep_speech2(audio_data, - text_data, - dict_size, - num_conv_layers=2, - num_rnn_layers=3, - rnn_size=256, - use_gru=False, - share_rnn_weights=True): - """ - The whole DeepSpeech2 model structure. +def deep_speech_v2_network(audio_data, + text_data, + dict_size, + num_conv_layers=2, + num_rnn_layers=3, + rnn_size=256, + use_gru=False, + share_rnn_weights=True): + """The DeepSpeech2 network structure. :param audio_data: Audio spectrogram data layer. :type audio_data: LayerOutput diff --git a/tools/build_vocab.py b/tools/build_vocab.py index ac6003026..6fbb9bdfc 100644 --- a/tools/build_vocab.py +++ b/tools/build_vocab.py @@ -13,8 +13,8 @@ import json from collections import Counter import os.path import _init_paths -from data_utils import utils -from utils import add_arguments, print_arguments +from data_utils.utility import read_manifest +from utils.utility import add_arguments, print_arguments parser = argparse.ArgumentParser(description=__doc__) add_arg = functools.partial(add_arguments, argparser=parser) diff --git a/tools/compute_mean_std.py b/tools/compute_mean_std.py index 9f7bf06ce..5bb6be39d 100644 --- a/tools/compute_mean_std.py +++ b/tools/compute_mean_std.py @@ -9,7 +9,7 @@ import _init_paths from data_utils.normalizer import FeatureNormalizer from data_utils.augmentor.augmentation import AugmentationPipeline from data_utils.featurizer.audio_featurizer import AudioFeaturizer -from utils import add_arguments, print_arguments +from utils.utility import add_arguments, print_arguments parser = argparse.ArgumentParser(description=__doc__) add_arg = functools.partial(add_arguments, argparser=parser) diff --git a/tune.py b/tools/tune.py similarity index 93% rename from tune.py rename to tools/tune.py index eab00cfdb..7a2379109 100644 --- a/tune.py +++ b/tools/tune.py @@ -7,10 +7,11 @@ import numpy as np import argparse import functools import paddle.v2 as paddle +import _init_paths from data_utils.data import DataGenerator -from model import DeepSpeech2Model -from error_rate import wer -from utils import add_arguments, print_arguments +from models.model import DeepSpeech2Model +from utils.error_rate import wer +from utils.utility import add_arguments, print_arguments parser = argparse.ArgumentParser(description=__doc__) add_arg = functools.partial(add_arguments, argparser=parser) @@ -27,20 +28,20 @@ add_arg('num_betas', int, 20, "# of beta candidates for tuning.") add_arg('alpha_from', float, 0.1, "Where alpha starts tuning from.") add_arg('alpha_to', float, 0.36, "Where alpha ends tuning with.") add_arg('beta_from', float, 0.05, "Where beta starts tuning from.") -add_arg('beta_to', float, 0.36, "Where beta ends tuning with.") +add_arg('beta_to', float, 1.0, "Where beta ends tuning with.") add_arg('cutoff_prob', float, 0.99, "Cutoff probability for pruning.") add_arg('use_gru', bool, False, "Use GRUs instead of simple RNNs.") add_arg('use_gpu', bool, True, "Use GPU or not.") add_arg('share_rnn_weights',bool, True, "Share input-hidden weights across " "bi-directional RNNs. Not for GRU.") add_arg('tune_manifest', str, - 'datasets/manifest.test', + 'data/librispeech/manifest.dev', "Filepath of manifest to tune.") add_arg('mean_std_path', str, - 'mean_std.npz', + 'data/librispeech/mean_std.npz', "Filepath of normalizer's mean & std.") add_arg('vocab_path', str, - 'datasets/vocab/eng_vocab.txt', + 'data/librispeech/eng_vocab.txt', "Filepath of vocabulary.") add_arg('lang_model_path', str, 'lm/data/common_crawl_00.prune01111.trie.klm', diff --git a/train.py b/train.py index 7cef7539b..4a7a0eda2 100644 --- a/train.py +++ b/train.py @@ -6,9 +6,9 @@ from __future__ import print_function import argparse import functools import paddle.v2 as paddle -from model import DeepSpeech2Model +from models.model import DeepSpeech2Model from data_utils.data import DataGenerator -from utils import add_arguments, print_arguments +from utils.utility import add_arguments, print_arguments parser = argparse.ArgumentParser(description=__doc__) add_arg = functools.partial(add_arguments, argparser=parser) @@ -27,21 +27,21 @@ add_arg('max_duration', float, 27.0, "Longest audio duration allowed.") add_arg('min_duration', float, 0.0, "Shortest audio duration allowed.") add_arg('use_sortagrad', bool, True, "Use SortaGrad or not.") add_arg('use_gpu', bool, True, "Use GPU or not.") -add_arg('is_local', bool, True, "Use pserver or not.") add_arg('use_gru', bool, False, "Use GRUs instead of simple RNNs.") +add_arg('is_local', bool, True, "Use pserver or not.") add_arg('share_rnn_weights',bool, True, "Share input-hidden weights across " "bi-directional RNNs. Not for GRU.") add_arg('train_manifest', str, - 'datasets/manifest.train', + 'data/librispeech/manifest.train', "Filepath of train manifest.") add_arg('dev_manifest', str, - 'datasets/manifest.dev', + 'data/librispeech/manifest.dev-clean', "Filepath of validation manifest.") add_arg('mean_std_path', str, - 'mean_std.npz', + 'data/librispeech/mean_std.npz', "Filepath of normalizer's mean & std.") add_arg('vocab_path', str, - 'datasets/vocab/eng_vocab.txt', + 'data/librispeech/eng_vocab.txt', "Filepath of vocabulary.") add_arg('init_model_path', str, None, @@ -101,7 +101,7 @@ def train(): rnn_layer_size=args.rnn_layer_size, use_gru=args.use_gru, pretrained_model_path=args.init_model_path, - share_rnn_weights=args.share_weights) + share_rnn_weights=args.share_rnn_weights) ds2_model.train( train_batch_reader=train_batch_reader, dev_batch_reader=dev_batch_reader, diff --git a/utils/__init__.py b/utils/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/decoder.py b/utils/decoder.py similarity index 100% rename from decoder.py rename to utils/decoder.py diff --git a/error_rate.py b/utils/error_rate.py similarity index 100% rename from error_rate.py rename to utils/error_rate.py diff --git a/utils.py b/utils/utility.py similarity index 100% rename from utils.py rename to utils/utility.py From 5623b09868abe7fc81fb356b9e9f5453772ac7ef Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Wed, 6 Sep 2017 14:33:25 +0800 Subject: [PATCH 171/335] Move decoder.py to models and re-arrange unitests. --- README.md | 2 ++ {utils => models}/decoder.py | 0 models/model.py | 2 +- {tests => models/tests}/test_decoders.py | 0 evaluate.py => test.py | 0 {tests => utils/tests}/test_error_rate.py | 0 6 files changed, 3 insertions(+), 1 deletion(-) rename {utils => models}/decoder.py (100%) rename {tests => models/tests}/test_decoders.py (100%) rename evaluate.py => test.py (100%) rename {tests => utils/tests}/test_error_rate.py (100%) diff --git a/README.md b/README.md index 4e8befa5b..db07d8c20 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,7 @@ # DeepSpeech2 on PaddlePaddle +>TODO: to be updated, since the directory hierarchy was changed. + ## Installation ``` diff --git a/utils/decoder.py b/models/decoder.py similarity index 100% rename from utils/decoder.py rename to models/decoder.py diff --git a/models/model.py b/models/model.py index 3e6fc328a..93c4c41bf 100644 --- a/models/model.py +++ b/models/model.py @@ -8,8 +8,8 @@ import os import time import gzip import paddle.v2 as paddle -from utils.decoder import ctc_greedy_decoder, ctc_beam_search_decoder from lm.lm_scorer import LmScorer +from models.decoder import ctc_greedy_decoder, ctc_beam_search_decoder from models.network import deep_speech_v2_network diff --git a/tests/test_decoders.py b/models/tests/test_decoders.py similarity index 100% rename from tests/test_decoders.py rename to models/tests/test_decoders.py diff --git a/evaluate.py b/test.py similarity index 100% rename from evaluate.py rename to test.py diff --git a/tests/test_error_rate.py b/utils/tests/test_error_rate.py similarity index 100% rename from tests/test_error_rate.py rename to utils/tests/test_error_rate.py From b2eb008a71a15166ae33746ea4c0d6029e3ab392 Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Wed, 6 Sep 2017 14:40:11 +0800 Subject: [PATCH 172/335] Remove test_setup.py. --- tests/test_setup.py | 23 ----------------------- 1 file changed, 23 deletions(-) delete mode 100644 tests/test_setup.py diff --git a/tests/test_setup.py b/tests/test_setup.py deleted file mode 100644 index 18b9c1a0c..000000000 --- a/tests/test_setup.py +++ /dev/null @@ -1,23 +0,0 @@ -"""Test Setup.""" -import unittest -import numpy as np -import os - - -class TestSetup(unittest.TestCase): - def test_soundfile(self): - import soundfile as sf - # floating point data is typically limited to the interval [-1.0, 1.0], - # but smaller/larger values are supported as well - data = np.array([[1.75, -1.75], [1.0, -1.0], [0.5, -0.5], - [0.25, -0.25]]) - file = 'test.wav' - sf.write(file, data, 44100, format='WAV', subtype='FLOAT') - read, fs = sf.read(file) - self.assertTrue(np.all(read == data)) - self.assertEqual(fs, 44100) - os.remove(file) - - -if __name__ == '__main__': - unittest.main() From 5208b8e40f23a1677a4a9471343cfd64426103a1 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Wed, 6 Sep 2017 18:18:53 +0800 Subject: [PATCH 173/335] format C++ source code --- deploy/ctc_decoders.cpp | 592 +++++++++++++++++++-------------------- deploy/ctc_decoders.h | 44 ++- deploy/decoder_utils.cpp | 160 ++++++----- deploy/decoder_utils.h | 44 ++- deploy/path_trie.cpp | 209 +++++++------- deploy/path_trie.h | 62 ++-- deploy/scorer.cpp | 331 +++++++++++----------- deploy/scorer.h | 86 +++--- 8 files changed, 749 insertions(+), 779 deletions(-) diff --git a/deploy/ctc_decoders.cpp b/deploy/ctc_decoders.cpp index 4e94edfbb..cedb943ea 100644 --- a/deploy/ctc_decoders.cpp +++ b/deploy/ctc_decoders.cpp @@ -1,337 +1,329 @@ -#include -#include +#include "ctc_decoders.h" #include -#include #include +#include #include -#include "fst/fstlib.h" -#include "ctc_decoders.h" +#include +#include +#include "ThreadPool.h" #include "decoder_utils.h" +#include "fst/fstlib.h" #include "path_trie.h" -#include "ThreadPool.h" -std::string ctc_best_path_decoder(std::vector > probs_seq, - std::vector vocabulary) -{ - // dimension check - int num_time_steps = probs_seq.size(); - for (int i=0; i> probs_seq, + std::vector vocabulary) { + // dimension check + int num_time_steps = probs_seq.size(); + for (int i = 0; i < num_time_steps; i++) { + if (probs_seq[i].size() != vocabulary.size() + 1) { + std::cout << "The shape of probs_seq does not match" + << " with the shape of the vocabulary!" << std::endl; + exit(1); } - - int blank_id = vocabulary.size(); - - std::vector max_idx_vec; - double max_prob = 0.0; - int max_idx = 0; - for (int i = 0; i < num_time_steps; i++) { - for (int j = 0; j < probs_seq[i].size(); j++) { - if (max_prob < probs_seq[i][j]) { - max_idx = j; - max_prob = probs_seq[i][j]; - } - } - max_idx_vec.push_back(max_idx); - max_prob = 0.0; - max_idx = 0; + } + + int blank_id = vocabulary.size(); + + std::vector max_idx_vec; + double max_prob = 0.0; + int max_idx = 0; + for (int i = 0; i < num_time_steps; i++) { + for (int j = 0; j < probs_seq[i].size(); j++) { + if (max_prob < probs_seq[i][j]) { + max_idx = j; + max_prob = probs_seq[i][j]; + } } - - std::vector idx_vec; - for (int i = 0; i < max_idx_vec.size(); i++) { - if ((i == 0) || ((i > 0) && max_idx_vec[i] != max_idx_vec[i-1])) { - idx_vec.push_back(max_idx_vec[i]); - } + max_idx_vec.push_back(max_idx); + max_prob = 0.0; + max_idx = 0; + } + + std::vector idx_vec; + for (int i = 0; i < max_idx_vec.size(); i++) { + if ((i == 0) || ((i > 0) && max_idx_vec[i] != max_idx_vec[i - 1])) { + idx_vec.push_back(max_idx_vec[i]); } + } - std::string best_path_result; - for (int i = 0; i < idx_vec.size(); i++) { - if (idx_vec[i] != blank_id) { - best_path_result += vocabulary[idx_vec[i]]; - } + std::string best_path_result; + for (int i = 0; i < idx_vec.size(); i++) { + if (idx_vec[i] != blank_id) { + best_path_result += vocabulary[idx_vec[i]]; } - return best_path_result; + } + return best_path_result; } -std::vector > - ctc_beam_search_decoder(std::vector > probs_seq, - int beam_size, - std::vector vocabulary, - int blank_id, - double cutoff_prob, - int cutoff_top_n, - Scorer *ext_scorer) -{ - // dimension check - int num_time_steps = probs_seq.size(); - for (int i = 0; i < num_time_steps; i++) { - if (probs_seq[i].size() != vocabulary.size() + 1) { - std::cout << " The shape of probs_seq does not match" - << " with the shape of the vocabulary!" << std::endl; - exit(1); - } +std::vector> ctc_beam_search_decoder( + std::vector> probs_seq, + int beam_size, + std::vector vocabulary, + int blank_id, + double cutoff_prob, + int cutoff_top_n, + Scorer *extscorer) { + // dimension check + int num_time_steps = probs_seq.size(); + for (int i = 0; i < num_time_steps; i++) { + if (probs_seq[i].size() != vocabulary.size() + 1) { + std::cout << " The shape of probs_seq does not match" + << " with the shape of the vocabulary!" << std::endl; + exit(1); } - - // blank_id check - if (blank_id > vocabulary.size()) { - std::cout << " Invalid blank_id! " << std::endl; - exit(1); + } + + // blank_id check + if (blank_id > vocabulary.size()) { + std::cout << " Invalid blank_id! " << std::endl; + exit(1); + } + + // assign space ID + std::vector::iterator it = + std::find(vocabulary.begin(), vocabulary.end(), " "); + int space_id = it - vocabulary.begin(); + // if no space in vocabulary + if (space_id >= vocabulary.size()) { + space_id = -2; + } + + // init prefixes' root + PathTrie root; + root.score = root.log_prob_b_prev = 0.0; + std::vector prefixes; + prefixes.push_back(&root); + + if (extscorer != nullptr) { + if (extscorer->is_char_map_empty()) { + extscorer->set_char_map(vocabulary); } - - // assign space ID - std::vector::iterator it = std::find(vocabulary.begin(), - vocabulary.end(), " "); - int space_id = it - vocabulary.begin(); - // if no space in vocabulary - if(space_id >= vocabulary.size()) { - space_id = -2; + if (!extscorer->is_character_based()) { + if (extscorer->dictionary == nullptr) { + // fill dictionary for fst + extscorer->fill_dictionary(true); + } + auto fst_dict = static_cast(extscorer->dictionary); + fst::StdVectorFst *dict_ptr = fst_dict->Copy(true); + root.set_dictionary(dict_ptr); + auto matcher = std::make_shared(*dict_ptr, fst::MATCH_INPUT); + root.set_matcher(matcher); + } + } + + // prefix search over time + for (int time_step = 0; time_step < num_time_steps; time_step++) { + std::vector prob = probs_seq[time_step]; + std::vector> prob_idx; + for (int i = 0; i < prob.size(); i++) { + prob_idx.push_back(std::pair(i, prob[i])); } - // init prefixes' root - PathTrie root; - root._score = root._log_prob_b_prev = 0.0; - std::vector prefixes; - prefixes.push_back(&root); + float min_cutoff = -NUM_FLT_INF; + bool full_beam = false; + if (extscorer != nullptr) { + int num_prefixes = std::min((int)prefixes.size(), beam_size); + std::sort( + prefixes.begin(), prefixes.begin() + num_prefixes, prefix_compare); + min_cutoff = prefixes[num_prefixes - 1]->score + log(prob[blank_id]) - + std::max(0.0, extscorer->beta); + full_beam = (num_prefixes == beam_size); + } - if ( ext_scorer != nullptr) { - if (ext_scorer->is_char_map_empty()) { - ext_scorer->set_char_map(vocabulary); - } - if (!ext_scorer->is_character_based()) { - if (ext_scorer->dictionary == nullptr) { - // fill dictionary for fst - ext_scorer->fill_dictionary(true); - } - auto fst_dict = static_cast - (ext_scorer->dictionary); - fst::StdVectorFst* dict_ptr = fst_dict->Copy(true); - root.set_dictionary(dict_ptr); - auto matcher = std::make_shared - (*dict_ptr, fst::MATCH_INPUT); - root.set_matcher(matcher); + // pruning of vacobulary + int cutoff_len = prob.size(); + if (cutoff_prob < 1.0 || cutoff_top_n < prob.size()) { + std::sort( + prob_idx.begin(), prob_idx.end(), pair_comp_second_rev); + if (cutoff_prob < 1.0) { + double cum_prob = 0.0; + cutoff_len = 0; + for (int i = 0; i < prob_idx.size(); i++) { + cum_prob += prob_idx[i].second; + cutoff_len += 1; + if (cum_prob >= cutoff_prob) break; } + } + cutoff_len = std::min(cutoff_len, cutoff_top_n); + prob_idx = std::vector>( + prob_idx.begin(), prob_idx.begin() + cutoff_len); + } + std::vector> log_prob_idx; + for (int i = 0; i < cutoff_len; i++) { + log_prob_idx.push_back(std::pair( + prob_idx[i].first, log(prob_idx[i].second + NUM_FLT_MIN))); } - // prefix search over time - for (int time_step = 0; time_step < num_time_steps; time_step++) { - std::vector prob = probs_seq[time_step]; - std::vector > prob_idx; - for (int i=0; i(i, prob[i])); - } + // loop over chars + for (int index = 0; index < log_prob_idx.size(); index++) { + auto c = log_prob_idx[index].first; + float log_prob_c = log_prob_idx[index].second; - float min_cutoff = -NUM_FLT_INF; - bool full_beam = false; - if (ext_scorer != nullptr) { - int num_prefixes = std::min((int)prefixes.size(), beam_size); - std::sort(prefixes.begin(), prefixes.begin() + num_prefixes, - prefix_compare); - min_cutoff = prefixes[num_prefixes-1]->_score + log(prob[blank_id]) - - std::max(0.0, ext_scorer->beta); - full_beam = (num_prefixes == beam_size); - } - - // pruning of vacobulary - int cutoff_len = prob.size(); - if (cutoff_prob < 1.0 || cutoff_top_n < prob.size()) { - std::sort(prob_idx.begin(), - prob_idx.end(), - pair_comp_second_rev); - if (cutoff_prob < 1.0) { - double cum_prob = 0.0; - cutoff_len = 0; - for (int i=0; i= cutoff_prob) break; - } - } - cutoff_len = std::min(cutoff_len, cutoff_top_n); - prob_idx = std::vector >( prob_idx.begin(), - prob_idx.begin() + cutoff_len); - } - std::vector > log_prob_idx; - for (int i = 0; i < cutoff_len; i++) { - log_prob_idx.push_back(std::pair - (prob_idx[i].first, log(prob_idx[i].second + NUM_FLT_MIN))); - } + for (int i = 0; i < prefixes.size() && i < beam_size; i++) { + auto prefix = prefixes[i]; - // loop over chars - for (int index = 0; index < log_prob_idx.size(); index++) { - auto c = log_prob_idx[index].first; - float log_prob_c = log_prob_idx[index].second; - - for (int i = 0; i < prefixes.size() && i_score < min_cutoff) { - break; - } - // blank - if (c == blank_id) { - prefix->_log_prob_b_cur = log_sum_exp( - prefix->_log_prob_b_cur, - log_prob_c + prefix->_score); - continue; - } - // repeated character - if (c == prefix->_character) { - prefix->_log_prob_nb_cur = log_sum_exp( - prefix->_log_prob_nb_cur, - log_prob_c + prefix->_log_prob_nb_prev); - } - // get new prefix - auto prefix_new = prefix->get_path_trie(c); - - if (prefix_new != nullptr) { - float log_p = -NUM_FLT_INF; - - if (c == prefix->_character - && prefix->_log_prob_b_prev > -NUM_FLT_INF) { - log_p = log_prob_c + prefix->_log_prob_b_prev; - } else if (c != prefix->_character) { - log_p = log_prob_c + prefix->_score; - } - - // language model scoring - if (ext_scorer != nullptr && - (c == space_id || ext_scorer->is_character_based()) ) { - PathTrie *prefix_to_score = nullptr; - - // skip scoring the space - if (ext_scorer->is_character_based()) { - prefix_to_score = prefix_new; - } else { - prefix_to_score = prefix; - } - - double score = 0.0; - std::vector ngram; - ngram = ext_scorer->make_ngram(prefix_to_score); - score = ext_scorer->get_log_cond_prob(ngram) * - ext_scorer->alpha; - - log_p += score; - log_p += ext_scorer->beta; - } - prefix_new->_log_prob_nb_cur = log_sum_exp( - prefix_new->_log_prob_nb_cur, log_p); - } - } // end of loop over prefix - } // end of loop over chars - - prefixes.clear(); - // update log probs - root.iterate_to_vec(prefixes); - - // only preserve top beam_size prefixes - if (prefixes.size() >= beam_size) { - std::nth_element(prefixes.begin(), - prefixes.begin() + beam_size, - prefixes.end(), - prefix_compare); - - for (size_t i = beam_size; i < prefixes.size(); i++) { - prefixes[i]->remove(); - } + if (full_beam && log_prob_c + prefix->score < min_cutoff) { + break; } - } // end of loop over time - - // compute aproximate ctc score as the return score - for (size_t i = 0; i < beam_size && i < prefixes.size(); i++) { - double approx_ctc = prefixes[i]->_score; - - if (ext_scorer != nullptr) { - std::vector output; - prefixes[i]->get_path_vec(output); - size_t prefix_length = output.size(); - auto words = ext_scorer->split_labels(output); - // remove word insert - approx_ctc = approx_ctc - prefix_length * ext_scorer->beta; - // remove language model weight: - approx_ctc -= (ext_scorer->get_sent_log_prob(words)) - * ext_scorer->alpha; + // blank + if (c == blank_id) { + prefix->log_prob_b_cur = + log_sum_exp(prefix->log_prob_b_cur, log_prob_c + prefix->score); + continue; + } + // repeated character + if (c == prefix->character) { + prefix->log_prob_nb_cur = log_sum_exp( + prefix->log_prob_nb_cur, log_prob_c + prefix->log_prob_nb_prev); } + // get new prefix + auto prefix_new = prefix->get_path_trie(c); + + if (prefix_new != nullptr) { + float log_p = -NUM_FLT_INF; + + if (c == prefix->character && + prefix->log_prob_b_prev > -NUM_FLT_INF) { + log_p = log_prob_c + prefix->log_prob_b_prev; + } else if (c != prefix->character) { + log_p = log_prob_c + prefix->score; + } + + // language model scoring + if (extscorer != nullptr && + (c == space_id || extscorer->is_character_based())) { + PathTrie *prefix_toscore = nullptr; + + // skip scoring the space + if (extscorer->is_character_based()) { + prefix_toscore = prefix_new; + } else { + prefix_toscore = prefix; + } - prefixes[i]->_approx_ctc = approx_ctc; - } + double score = 0.0; + std::vector ngram; + ngram = extscorer->make_ngram(prefix_toscore); + score = extscorer->get_log_cond_prob(ngram) * extscorer->alpha; - // allow for the post processing - std::vector space_prefixes; - if (space_prefixes.empty()) { - for (size_t i = 0; i < beam_size && i< prefixes.size(); i++) { - space_prefixes.push_back(prefixes[i]); + log_p += score; + log_p += extscorer->beta; + } + prefix_new->log_prob_nb_cur = + log_sum_exp(prefix_new->log_prob_nb_cur, log_p); } + } // end of loop over prefix + } // end of loop over chars + + prefixes.clear(); + // update log probs + root.iterate_to_vec(prefixes); + + // only preserve top beam_size prefixes + if (prefixes.size() >= beam_size) { + std::nth_element(prefixes.begin(), + prefixes.begin() + beam_size, + prefixes.end(), + prefix_compare); + + for (size_t i = beam_size; i < prefixes.size(); i++) { + prefixes[i]->remove(); + } } - - std::sort(space_prefixes.begin(), space_prefixes.end(), prefix_compare); - std::vector > output_vecs; - for (size_t i = 0; i < beam_size && i < space_prefixes.size(); i++) { - std::vector output; - space_prefixes[i]->get_path_vec(output); - // convert index to string - std::string output_str; - for (int j = 0; j < output.size(); j++) { - output_str += vocabulary[output[j]]; - } - std::pair - output_pair(-space_prefixes[i]->_approx_ctc, output_str); - output_vecs.emplace_back(output_pair); + } // end of loop over time + + // compute aproximate ctc score as the return score + for (size_t i = 0; i < beam_size && i < prefixes.size(); i++) { + double approx_ctc = prefixes[i]->score; + + if (extscorer != nullptr) { + std::vector output; + prefixes[i]->get_path_vec(output); + size_t prefix_length = output.size(); + auto words = extscorer->split_labels(output); + // remove word insert + approx_ctc = approx_ctc - prefix_length * extscorer->beta; + // remove language model weight: + approx_ctc -= (extscorer->get_sent_log_prob(words)) * extscorer->alpha; } - return output_vecs; - } - - -std::vector > > - ctc_beam_search_decoder_batch( - std::vector>> probs_split, - int beam_size, - std::vector vocabulary, - int blank_id, - int num_processes, - double cutoff_prob, - int cutoff_top_n, - Scorer *ext_scorer - ) { - if (num_processes <= 0) { - std::cout << "num_processes must be nonnegative!" << std::endl; - exit(1); + prefixes[i]->approx_ctc = approx_ctc; + } + + // allow for the post processing + std::vector space_prefixes; + if (space_prefixes.empty()) { + for (size_t i = 0; i < beam_size && i < prefixes.size(); i++) { + space_prefixes.push_back(prefixes[i]); } - // thread pool - ThreadPool pool(num_processes); - // number of samples - int batch_size = probs_split.size(); - - // scorer filling up - if ( ext_scorer != nullptr) { - if (ext_scorer->is_char_map_empty()) { - ext_scorer->set_char_map(vocabulary); - } - if(!ext_scorer->is_character_based() - && ext_scorer->dictionary == nullptr) { - // init dictionary - ext_scorer->fill_dictionary(true); - } + } + + std::sort(space_prefixes.begin(), space_prefixes.end(), prefix_compare); + std::vector> output_vecs; + for (size_t i = 0; i < beam_size && i < space_prefixes.size(); i++) { + std::vector output; + space_prefixes[i]->get_path_vec(output); + // convert index to string + std::string output_str; + for (int j = 0; j < output.size(); j++) { + output_str += vocabulary[output[j]]; } + std::pair output_pair(-space_prefixes[i]->approx_ctc, + output_str); + output_vecs.emplace_back(output_pair); + } - // enqueue the tasks of decoding - std::vector>>> res; - for (int i = 0; i < batch_size; i++) { - res.emplace_back( - pool.enqueue(ctc_beam_search_decoder, probs_split[i], - beam_size, vocabulary, blank_id, cutoff_prob, - cutoff_top_n, ext_scorer) - ); - } + return output_vecs; +} - // get decoding results - std::vector > > batch_results; - for (int i = 0; i < batch_size; i++) { - batch_results.emplace_back(res[i].get()); +std::vector>> +ctc_beam_search_decoder_batch( + std::vector>> probs_split, + int beam_size, + std::vector vocabulary, + int blank_id, + int num_processes, + double cutoff_prob, + int cutoff_top_n, + Scorer *extscorer) { + if (num_processes <= 0) { + std::cout << "num_processes must be nonnegative!" << std::endl; + exit(1); + } + // thread pool + ThreadPool pool(num_processes); + // number of samples + int batch_size = probs_split.size(); + + // scorer filling up + if (extscorer != nullptr) { + if (extscorer->is_char_map_empty()) { + extscorer->set_char_map(vocabulary); + } + if (!extscorer->is_character_based() && + extscorer->dictionary == nullptr) { + // init dictionary + extscorer->fill_dictionary(true); } - return batch_results; + } + + // enqueue the tasks of decoding + std::vector>>> res; + for (int i = 0; i < batch_size; i++) { + res.emplace_back(pool.enqueue(ctc_beam_search_decoder, + probs_split[i], + beam_size, + vocabulary, + blank_id, + cutoff_prob, + cutoff_top_n, + extscorer)); + } + + // get decoding results + std::vector>> batch_results; + for (int i = 0; i < batch_size; i++) { + batch_results.emplace_back(res[i].get()); + } + return batch_results; } diff --git a/deploy/ctc_decoders.h b/deploy/ctc_decoders.h index 58d2b7895..78edefb77 100644 --- a/deploy/ctc_decoders.h +++ b/deploy/ctc_decoders.h @@ -1,9 +1,9 @@ #ifndef CTC_BEAM_SEARCH_DECODER_H_ #define CTC_BEAM_SEARCH_DECODER_H_ -#include #include #include +#include #include "scorer.h" /* CTC Best Path Decoder @@ -16,8 +16,8 @@ * A vector that each element is a pair of score and decoding result, * in desending order. */ -std::string ctc_best_path_decoder(std::vector > probs_seq, - std::vector vocabulary); +std::string ctc_best_path_decoder(std::vector> probs_seq, + std::vector vocabulary); /* CTC Beam Search Decoder @@ -34,15 +34,14 @@ std::string ctc_best_path_decoder(std::vector > probs_seq, * A vector that each element is a pair of score and decoding result, * in desending order. */ -std::vector > - ctc_beam_search_decoder(std::vector > probs_seq, - int beam_size, - std::vector vocabulary, - int blank_id, - double cutoff_prob=1.0, - int cutoff_top_n=40, - Scorer *ext_scorer=NULL - ); +std::vector> ctc_beam_search_decoder( + std::vector> probs_seq, + int beam_size, + std::vector vocabulary, + int blank_id, + double cutoff_prob = 1.0, + int cutoff_top_n = 40, + Scorer *ext_scorer = NULL); /* CTC Beam Search Decoder for batch data, the interface is consistent with the * original decoder in Python version. @@ -63,15 +62,14 @@ std::vector > * sample. */ std::vector>> - ctc_beam_search_decoder_batch(std::vector>> probs_split, - int beam_size, - std::vector vocabulary, - int blank_id, - int num_processes, - double cutoff_prob=1.0, - int cutoff_top_n=40, - Scorer *ext_scorer=NULL - ); - +ctc_beam_search_decoder_batch( + std::vector>> probs_split, + int beam_size, + std::vector vocabulary, + int blank_id, + int num_processes, + double cutoff_prob = 1.0, + int cutoff_top_n = 40, + Scorer *ext_scorer = NULL); -#endif // CTC_BEAM_SEARCH_DECODER_H_ +#endif // CTC_BEAM_SEARCH_DECODER_H_ diff --git a/deploy/decoder_utils.cpp b/deploy/decoder_utils.cpp index 37674f71e..bed0f623f 100644 --- a/deploy/decoder_utils.cpp +++ b/deploy/decoder_utils.cpp @@ -1,113 +1,111 @@ -#include +#include "decoder_utils.h" #include #include -#include "decoder_utils.h" +#include size_t get_utf8_str_len(const std::string& str) { - size_t str_len = 0; - for (char c : str) { - str_len += ((c & 0xc0) != 0x80); - } - return str_len; + size_t str_len = 0; + for (char c : str) { + str_len += ((c & 0xc0) != 0x80); + } + return str_len; } -std::vector split_utf8_str(const std::string& str) -{ +std::vector split_utf8_str(const std::string& str) { std::vector result; std::string out_str; - for (char c : str) + for (char c : str) { + if ((c & 0xc0) != 0x80) // new UTF-8 character { - if ((c & 0xc0) != 0x80) //new UTF-8 character - { - if (!out_str.empty()) - { - result.push_back(out_str); - out_str.clear(); - } - } - - out_str.append(1, c); + if (!out_str.empty()) { + result.push_back(out_str); + out_str.clear(); + } } + + out_str.append(1, c); + } result.push_back(out_str); return result; } -std::vector split_str(const std::string &s, - const std::string &delim) { - std::vector result; - std::size_t start = 0, delim_len = delim.size(); - while (true) { - std::size_t end = s.find(delim, start); - if (end == std::string::npos) { - if (start < s.size()) { - result.push_back(s.substr(start)); - } - break; - } - if (end > start) { - result.push_back(s.substr(start, end - start)); - } - start = end + delim_len; +std::vector split_str(const std::string& s, + const std::string& delim) { + std::vector result; + std::size_t start = 0, delim_len = delim.size(); + while (true) { + std::size_t end = s.find(delim, start); + if (end == std::string::npos) { + if (start < s.size()) { + result.push_back(s.substr(start)); + } + break; + } + if (end > start) { + result.push_back(s.substr(start, end - start)); } - return result; + start = end + delim_len; + } + return result; } -bool prefix_compare(const PathTrie* x, const PathTrie* y) { - if (x->_score == y->_score) { - if (x->_character == y->_character) { - return false; - } else { - return (x->_character < y->_character); - } +bool prefix_compare(const PathTrie* x, const PathTrie* y) { + if (x->score == y->score) { + if (x->character == y->character) { + return false; } else { - return x->_score > y->_score; + return (x->character < y->character); } + } else { + return x->score > y->score; + } } void add_word_to_fst(const std::vector& word, fst::StdVectorFst* dictionary) { - if (dictionary->NumStates() == 0) { - fst::StdVectorFst::StateId start = dictionary->AddState(); - assert(start == 0); - dictionary->SetStart(start); - } - fst::StdVectorFst::StateId src = dictionary->Start(); - fst::StdVectorFst::StateId dst; - for (auto c : word) { - dst = dictionary->AddState(); - dictionary->AddArc(src, fst::StdArc(c, c, 0, dst)); - src = dst; - } - dictionary->SetFinal(dst, fst::StdArc::Weight::One()); + if (dictionary->NumStates() == 0) { + fst::StdVectorFst::StateId start = dictionary->AddState(); + assert(start == 0); + dictionary->SetStart(start); + } + fst::StdVectorFst::StateId src = dictionary->Start(); + fst::StdVectorFst::StateId dst; + for (auto c : word) { + dst = dictionary->AddState(); + dictionary->AddArc(src, fst::StdArc(c, c, 0, dst)); + src = dst; + } + dictionary->SetFinal(dst, fst::StdArc::Weight::One()); } -bool add_word_to_dictionary(const std::string& word, - const std::unordered_map& char_map, - bool add_space, - int SPACE_ID, - fst::StdVectorFst* dictionary) { - auto characters = split_utf8_str(word); +bool add_word_to_dictionary( + const std::string& word, + const std::unordered_map& char_map, + bool add_space, + int SPACE_ID, + fst::StdVectorFst* dictionary) { + auto characters = split_utf8_str(word); - std::vector int_word; + std::vector int_word; - for (auto& c : characters) { - if (c == " ") { - int_word.push_back(SPACE_ID); - } else { - auto int_c = char_map.find(c); - if (int_c != char_map.end()) { - int_word.push_back(int_c->second); - } else { - return false; // return without adding - } - } + for (auto& c : characters) { + if (c == " ") { + int_word.push_back(SPACE_ID); + } else { + auto int_c = char_map.find(c); + if (int_c != char_map.end()) { + int_word.push_back(int_c->second); + } else { + return false; // return without adding + } } + } - if (add_space) { - int_word.push_back(SPACE_ID); - } + if (add_space) { + int_word.push_back(SPACE_ID); + } - add_word_to_fst(int_word, dictionary); - return true; + add_word_to_fst(int_word, dictionary); + return true; } diff --git a/deploy/decoder_utils.h b/deploy/decoder_utils.h index 829ea76d0..51985c86e 100644 --- a/deploy/decoder_utils.h +++ b/deploy/decoder_utils.h @@ -10,34 +10,31 @@ const float NUM_FLT_MIN = std::numeric_limits::min(); // Function template for comparing two pairs template bool pair_comp_first_rev(const std::pair &a, - const std::pair &b) -{ - return a.first > b.first; + const std::pair &b) { + return a.first > b.first; } template bool pair_comp_second_rev(const std::pair &a, - const std::pair &b) -{ - return a.second > b.second; + const std::pair &b) { + return a.second > b.second; } template -T log_sum_exp(const T &x, const T &y) -{ - static T num_min = -std::numeric_limits::max(); - if (x <= num_min) return y; - if (y <= num_min) return x; - T xmax = std::max(x, y); - return std::log(std::exp(x-xmax) + std::exp(y-xmax)) + xmax; +T log_sum_exp(const T &x, const T &y) { + static T num_min = -std::numeric_limits::max(); + if (x <= num_min) return y; + if (y <= num_min) return x; + T xmax = std::max(x, y); + return std::log(std::exp(x - xmax) + std::exp(y - xmax)) + xmax; } // Functor for prefix comparsion -bool prefix_compare(const PathTrie* x, const PathTrie* y); +bool prefix_compare(const PathTrie *x, const PathTrie *y); // Get length of utf8 encoding string // See: http://stackoverflow.com/a/4063229 -size_t get_utf8_str_len(const std::string& str); +size_t get_utf8_str_len(const std::string &str); // Split a string into a list of strings on a given string // delimiter. NB: delimiters on beginning / end of string are @@ -50,13 +47,14 @@ std::vector split_str(const std::string &s, std::vector split_utf8_str(const std::string &str); // Add a word in index to the dicionary of fst -void add_word_to_fst(const std::vector& word, - fst::StdVectorFst* dictionary); +void add_word_to_fst(const std::vector &word, + fst::StdVectorFst *dictionary); // Add a word in string to dictionary -bool add_word_to_dictionary(const std::string& word, - const std::unordered_map& char_map, - bool add_space, - int SPACE_ID, - fst::StdVectorFst* dictionary); -#endif // DECODER_UTILS_H +bool add_word_to_dictionary( + const std::string &word, + const std::unordered_map &char_map, + bool add_space, + int SPACE_ID, + fst::StdVectorFst *dictionary); +#endif // DECODER_UTILS_H diff --git a/deploy/path_trie.cpp b/deploy/path_trie.cpp index b22f2a471..db0b20cb5 100644 --- a/deploy/path_trie.cpp +++ b/deploy/path_trie.cpp @@ -4,145 +4,142 @@ #include #include -#include "path_trie.h" #include "decoder_utils.h" +#include "path_trie.h" PathTrie::PathTrie() { - _log_prob_b_prev = -NUM_FLT_INF; - _log_prob_nb_prev = -NUM_FLT_INF; - _log_prob_b_cur = -NUM_FLT_INF; - _log_prob_nb_cur = -NUM_FLT_INF; - _score = -NUM_FLT_INF; - - _ROOT = -1; - _character = _ROOT; - _exists = true; - _parent = nullptr; - _dictionary = nullptr; - _dictionary_state = 0; - _has_dictionary = false; - _matcher = nullptr; // finds arcs in FST + log_prob_b_prev = -NUM_FLT_INF; + log_prob_nb_prev = -NUM_FLT_INF; + log_prob_b_cur = -NUM_FLT_INF; + log_prob_nb_cur = -NUM_FLT_INF; + score = -NUM_FLT_INF; + + _ROOT = -1; + character = _ROOT; + _exists = true; + parent = nullptr; + _dictionary = nullptr; + _dictionary_state = 0; + _has_dictionary = false; + _matcher = nullptr; // finds arcs in FST } PathTrie::~PathTrie() { - for (auto child : _children) { - delete child.second; - } + for (auto child : _children) { + delete child.second; + } } PathTrie* PathTrie::get_path_trie(int new_char, bool reset) { - auto child = _children.begin(); - for (child = _children.begin(); child != _children.end(); ++child) { - if (child->first == new_char) { - break; - } + auto child = _children.begin(); + for (child = _children.begin(); child != _children.end(); ++child) { + if (child->first == new_char) { + break; } - if ( child != _children.end() ) { - if (!child->second->_exists) { - child->second->_exists = true; - child->second->_log_prob_b_prev = -NUM_FLT_INF; - child->second->_log_prob_nb_prev = -NUM_FLT_INF; - child->second->_log_prob_b_cur = -NUM_FLT_INF; - child->second->_log_prob_nb_cur = -NUM_FLT_INF; + } + if (child != _children.end()) { + if (!child->second->_exists) { + child->second->_exists = true; + child->second->log_prob_b_prev = -NUM_FLT_INF; + child->second->log_prob_nb_prev = -NUM_FLT_INF; + child->second->log_prob_b_cur = -NUM_FLT_INF; + child->second->log_prob_nb_cur = -NUM_FLT_INF; + } + return (child->second); + } else { + if (_has_dictionary) { + _matcher->SetState(_dictionary_state); + bool found = _matcher->Find(new_char); + if (!found) { + // Adding this character causes word outside dictionary + auto FSTZERO = fst::TropicalWeight::Zero(); + auto final_weight = _dictionary->Final(_dictionary_state); + bool is_final = (final_weight != FSTZERO); + if (is_final && reset) { + _dictionary_state = _dictionary->Start(); } - return (child->second); + return nullptr; + } else { + PathTrie* new_path = new PathTrie; + new_path->character = new_char; + new_path->parent = this; + new_path->_dictionary = _dictionary; + new_path->_dictionary_state = _matcher->Value().nextstate; + new_path->_has_dictionary = true; + new_path->_matcher = _matcher; + _children.push_back(std::make_pair(new_char, new_path)); + return new_path; + } } else { - if (_has_dictionary) { - _matcher->SetState(_dictionary_state); - bool found = _matcher->Find(new_char); - if (!found) { - // Adding this character causes word outside dictionary - auto FSTZERO = fst::TropicalWeight::Zero(); - auto final_weight = _dictionary->Final(_dictionary_state); - bool is_final = (final_weight != FSTZERO); - if (is_final && reset) { - _dictionary_state = _dictionary->Start(); - } - return nullptr; - } else { - PathTrie* new_path = new PathTrie; - new_path->_character = new_char; - new_path->_parent = this; - new_path->_dictionary = _dictionary; - new_path->_dictionary_state = _matcher->Value().nextstate; - new_path->_has_dictionary = true; - new_path->_matcher = _matcher; - _children.push_back(std::make_pair(new_char, new_path)); - return new_path; - } - } else { - PathTrie* new_path = new PathTrie; - new_path->_character = new_char; - new_path->_parent = this; - _children.push_back(std::make_pair(new_char, new_path)); - return new_path; - } + PathTrie* new_path = new PathTrie; + new_path->character = new_char; + new_path->parent = this; + _children.push_back(std::make_pair(new_char, new_path)); + return new_path; } + } } PathTrie* PathTrie::get_path_vec(std::vector& output) { - return get_path_vec(output, _ROOT); + return get_path_vec(output, _ROOT); } PathTrie* PathTrie::get_path_vec(std::vector& output, - int stop, - size_t max_steps) { - if (_character == stop || - _character == _ROOT || - output.size() == max_steps) { - std::reverse(output.begin(), output.end()); - return this; - } else { - output.push_back(_character); - return _parent->get_path_vec(output, stop, max_steps); - } + int stop, + size_t max_steps) { + if (character == stop || character == _ROOT || output.size() == max_steps) { + std::reverse(output.begin(), output.end()); + return this; + } else { + output.push_back(character); + return parent->get_path_vec(output, stop, max_steps); + } } -void PathTrie::iterate_to_vec( - std::vector& output) { - if (_exists) { - _log_prob_b_prev = _log_prob_b_cur; - _log_prob_nb_prev = _log_prob_nb_cur; +void PathTrie::iterate_to_vec(std::vector& output) { + if (_exists) { + log_prob_b_prev = log_prob_b_cur; + log_prob_nb_prev = log_prob_nb_cur; - _log_prob_b_cur = -NUM_FLT_INF; - _log_prob_nb_cur = -NUM_FLT_INF; + log_prob_b_cur = -NUM_FLT_INF; + log_prob_nb_cur = -NUM_FLT_INF; - _score = log_sum_exp(_log_prob_b_prev, _log_prob_nb_prev); - output.push_back(this); - } - for (auto child : _children) { - child.second->iterate_to_vec(output); - } + score = log_sum_exp(log_prob_b_prev, log_prob_nb_prev); + output.push_back(this); + } + for (auto child : _children) { + child.second->iterate_to_vec(output); + } } void PathTrie::remove() { - _exists = false; - - if (_children.size() == 0) { - auto child = _parent->_children.begin(); - for (child = _parent->_children.begin(); - child != _parent->_children.end(); ++child) { - if (child->first == _character) { - _parent->_children.erase(child); - break; - } - } - - if ( _parent->_children.size() == 0 && !_parent->_exists ) { - _parent->remove(); - } + _exists = false; + + if (_children.size() == 0) { + auto child = parent->_children.begin(); + for (child = parent->_children.begin(); child != parent->_children.end(); + ++child) { + if (child->first == character) { + parent->_children.erase(child); + break; + } + } - delete this; + if (parent->_children.size() == 0 && !parent->_exists) { + parent->remove(); } + + delete this; + } } void PathTrie::set_dictionary(fst::StdVectorFst* dictionary) { - _dictionary = dictionary; - _dictionary_state = dictionary->Start(); - _has_dictionary = true; + _dictionary = dictionary; + _dictionary_state = dictionary->Start(); + _has_dictionary = true; } using FSTMATCH = fst::SortedMatcher; void PathTrie::set_matcher(std::shared_ptr matcher) { - _matcher = matcher; + _matcher = matcher; } diff --git a/deploy/path_trie.h b/deploy/path_trie.h index 7b378e3f9..cac524a3f 100644 --- a/deploy/path_trie.h +++ b/deploy/path_trie.h @@ -1,59 +1,57 @@ #ifndef PATH_TRIE_H #define PATH_TRIE_H #pragma once +#include #include #include #include #include #include -#include using FSTMATCH = fst::SortedMatcher; class PathTrie { public: - PathTrie(); - ~PathTrie(); - - PathTrie* get_path_trie(int new_char, bool reset = true); + PathTrie(); + ~PathTrie(); - PathTrie* get_path_vec(std::vector &output); + PathTrie* get_path_trie(int new_char, bool reset = true); - PathTrie* get_path_vec(std::vector& output, - int stop, - size_t max_steps = std::numeric_limits::max()); + PathTrie* get_path_vec(std::vector& output); - void iterate_to_vec(std::vector &output); + PathTrie* get_path_vec(std::vector& output, + int stop, + size_t max_steps = std::numeric_limits::max()); - void set_dictionary(fst::StdVectorFst* dictionary); + void iterate_to_vec(std::vector& output); - void set_matcher(std::shared_ptr matcher); + void set_dictionary(fst::StdVectorFst* dictionary); - bool is_empty() { - return _ROOT == _character; - } + void set_matcher(std::shared_ptr matcher); - void remove(); + bool is_empty() { return _ROOT == character; } - float _log_prob_b_prev; - float _log_prob_nb_prev; - float _log_prob_b_cur; - float _log_prob_nb_cur; - float _score; - float _approx_ctc; + void remove(); + float log_prob_b_prev; + float log_prob_nb_prev; + float log_prob_b_cur; + float log_prob_nb_cur; + float score; + float approx_ctc; + int character; + PathTrie* parent; - int _ROOT; - int _character; - bool _exists; +private: + int _ROOT; + bool _exists; - PathTrie *_parent; - std::vector > _children; + std::vector> _children; - fst::StdVectorFst* _dictionary; - fst::StdVectorFst::StateId _dictionary_state; - bool _has_dictionary; - std::shared_ptr _matcher; + fst::StdVectorFst* _dictionary; + fst::StdVectorFst::StateId _dictionary_state; + bool _has_dictionary; + std::shared_ptr _matcher; }; -#endif // PATH_TRIE_H +#endif // PATH_TRIE_H diff --git a/deploy/scorer.cpp b/deploy/scorer.cpp index ced71995b..8651eb61f 100644 --- a/deploy/scorer.cpp +++ b/deploy/scorer.cpp @@ -1,219 +1,208 @@ -#include +#include "scorer.h" #include +#include +#include "decoder_utils.h" #include "lm/config.hh" -#include "lm/state.hh" #include "lm/model.hh" -#include "util/tokenize_piece.hh" +#include "lm/state.hh" #include "util/string_piece.hh" -#include "scorer.h" -#include "decoder_utils.h" +#include "util/tokenize_piece.hh" using namespace lm::ngram; Scorer::Scorer(double alpha, double beta, const std::string& lm_path) { - this->alpha = alpha; - this->beta = beta; - _is_character_based = true; - _language_model = nullptr; - dictionary = nullptr; - _max_order = 0; - _SPACE_ID = -1; - // load language model - load_LM(lm_path.c_str()); + this->alpha = alpha; + this->beta = beta; + _is_character_based = true; + _language_model = nullptr; + dictionary = nullptr; + _max_order = 0; + _SPACE_ID = -1; + // load language model + load_LM(lm_path.c_str()); } Scorer::~Scorer() { - if (_language_model != nullptr) - delete static_cast(_language_model); - if (dictionary != nullptr) - delete static_cast(dictionary); + if (_language_model != nullptr) + delete static_cast(_language_model); + if (dictionary != nullptr) delete static_cast(dictionary); } void Scorer::load_LM(const char* filename) { - if (access(filename, F_OK) != 0) { - std::cerr << "Invalid language model file !!!" << std::endl; - exit(1); - } - RetriveStrEnumerateVocab enumerate; - lm::ngram::Config config; - config.enumerate_vocab = &enumerate; - _language_model = lm::ngram::LoadVirtual(filename, config); - _max_order = static_cast(_language_model)->Order(); - _vocabulary = enumerate.vocabulary; - for (size_t i = 0; i < _vocabulary.size(); ++i) { - if (_is_character_based - && _vocabulary[i] != UNK_TOKEN - && _vocabulary[i] != START_TOKEN - && _vocabulary[i] != END_TOKEN - && get_utf8_str_len(enumerate.vocabulary[i]) > 1) { - _is_character_based = false; - } + if (access(filename, F_OK) != 0) { + std::cerr << "Invalid language model file !!!" << std::endl; + exit(1); + } + RetriveStrEnumerateVocab enumerate; + lm::ngram::Config config; + config.enumerate_vocab = &enumerate; + _language_model = lm::ngram::LoadVirtual(filename, config); + _max_order = static_cast(_language_model)->Order(); + _vocabulary = enumerate.vocabulary; + for (size_t i = 0; i < _vocabulary.size(); ++i) { + if (_is_character_based && _vocabulary[i] != UNK_TOKEN && + _vocabulary[i] != START_TOKEN && _vocabulary[i] != END_TOKEN && + get_utf8_str_len(enumerate.vocabulary[i]) > 1) { + _is_character_based = false; } + } } double Scorer::get_log_cond_prob(const std::vector& words) { - lm::base::Model* model = static_cast(_language_model); - double cond_prob; - lm::ngram::State state, tmp_state, out_state; - // avoid to inserting in begin - model->NullContextWrite(&state); - for (size_t i = 0; i < words.size(); ++i) { - lm::WordIndex word_index = model->BaseVocabulary().Index(words[i]); - // encounter OOV - if (word_index == 0) { - return OOV_SCORE; - } - cond_prob = model->BaseScore(&state, word_index, &out_state); - tmp_state = state; - state = out_state; - out_state = tmp_state; + lm::base::Model* model = static_cast(_language_model); + double cond_prob; + lm::ngram::State state, tmp_state, out_state; + // avoid to inserting in begin + model->NullContextWrite(&state); + for (size_t i = 0; i < words.size(); ++i) { + lm::WordIndex word_index = model->BaseVocabulary().Index(words[i]); + // encounter OOV + if (word_index == 0) { + return OOV_SCORE; } - // log10 prob - return cond_prob; + cond_prob = model->BaseScore(&state, word_index, &out_state); + tmp_state = state; + state = out_state; + out_state = tmp_state; + } + // log10 prob + return cond_prob; } double Scorer::get_sent_log_prob(const std::vector& words) { - std::vector sentence; - if (words.size() == 0) { - for (size_t i = 0; i < _max_order; ++i) { - sentence.push_back(START_TOKEN); - } - } else { - for (size_t i = 0; i < _max_order - 1; ++i) { - sentence.push_back(START_TOKEN); - } - sentence.insert(sentence.end(), words.begin(), words.end()); + std::vector sentence; + if (words.size() == 0) { + for (size_t i = 0; i < _max_order; ++i) { + sentence.push_back(START_TOKEN); } - sentence.push_back(END_TOKEN); - return get_log_prob(sentence); + } else { + for (size_t i = 0; i < _max_order - 1; ++i) { + sentence.push_back(START_TOKEN); + } + sentence.insert(sentence.end(), words.begin(), words.end()); + } + sentence.push_back(END_TOKEN); + return get_log_prob(sentence); } double Scorer::get_log_prob(const std::vector& words) { - assert(words.size() > _max_order); - double score = 0.0; - for (size_t i = 0; i < words.size() - _max_order + 1; ++i) { - std::vector ngram(words.begin() + i, - words.begin() + i + _max_order); - score += get_log_cond_prob(ngram); - } - return score; + assert(words.size() > _max_order); + double score = 0.0; + for (size_t i = 0; i < words.size() - _max_order + 1; ++i) { + std::vector ngram(words.begin() + i, + words.begin() + i + _max_order); + score += get_log_cond_prob(ngram); + } + return score; } void Scorer::reset_params(float alpha, float beta) { - this->alpha = alpha; - this->beta = beta; + this->alpha = alpha; + this->beta = beta; } std::string Scorer::vec2str(const std::vector& input) { - std::string word; - for (auto ind : input) { - word += _char_list[ind]; - } - return word; + std::string word; + for (auto ind : input) { + word += _char_list[ind]; + } + return word; } -std::vector -Scorer::split_labels(const std::vector &labels) { - if (labels.empty()) - return {}; - - std::string s = vec2str(labels); - std::vector words; - if (_is_character_based) { - words = split_utf8_str(s); - } else { - words = split_str(s, " "); - } - return words; +std::vector Scorer::split_labels(const std::vector& labels) { + if (labels.empty()) return {}; + + std::string s = vec2str(labels); + std::vector words; + if (_is_character_based) { + words = split_utf8_str(s); + } else { + words = split_str(s, " "); + } + return words; } void Scorer::set_char_map(std::vector char_list) { - _char_list = char_list; - _char_map.clear(); - - for(unsigned int i = 0; i < _char_list.size(); i++) - { - if (_char_list[i] == " ") { - _SPACE_ID = i; - _char_map[' '] = i; - } else if(_char_list[i].size() == 1){ - _char_map[_char_list[i][0]] = i; - } + _char_list = char_list; + _char_map.clear(); + + for (unsigned int i = 0; i < _char_list.size(); i++) { + if (_char_list[i] == " ") { + _SPACE_ID = i; + _char_map[' '] = i; + } else if (_char_list[i].size() == 1) { + _char_map[_char_list[i][0]] = i; } + } } std::vector Scorer::make_ngram(PathTrie* prefix) { - std::vector ngram; - PathTrie* current_node = prefix; - PathTrie* new_node = nullptr; - - for (int order = 0; order < _max_order; order++) { - std::vector prefix_vec; - - if (_is_character_based) { - new_node = current_node->get_path_vec(prefix_vec, _SPACE_ID, 1); - current_node = new_node; - } else { - new_node = current_node->get_path_vec(prefix_vec, _SPACE_ID); - current_node = new_node->_parent; // Skipping spaces - } - - // reconstruct word - std::string word = vec2str(prefix_vec); - ngram.push_back(word); - - if (new_node->_character == -1) { - // No more spaces, but still need order - for (int i = 0; i < _max_order - order - 1; i++) { - ngram.push_back(START_TOKEN); - } - break; - } - } - std::reverse(ngram.begin(), ngram.end()); - return ngram; -} - -void Scorer::fill_dictionary(bool add_space) { + std::vector ngram; + PathTrie* current_node = prefix; + PathTrie* new_node = nullptr; - fst::StdVectorFst dictionary; - // First reverse char_list so ints can be accessed by chars - std::unordered_map char_map; - for (unsigned int i = 0; i < _char_list.size(); i++) { - char_map[_char_list[i]] = i; - } + for (int order = 0; order < _max_order; order++) { + std::vector prefix_vec; - // For each unigram convert to ints and put in trie - int vocab_size = 0; - for (const auto& word : _vocabulary) { - bool added = add_word_to_dictionary(word, - char_map, - add_space, - _SPACE_ID, - &dictionary); - vocab_size += added ? 1 : 0; + if (_is_character_based) { + new_node = current_node->get_path_vec(prefix_vec, _SPACE_ID, 1); + current_node = new_node; + } else { + new_node = current_node->get_path_vec(prefix_vec, _SPACE_ID); + current_node = new_node->parent; // Skipping spaces } - std::cerr << "Vocab Size " << vocab_size << std::endl; - - // Simplify FST - - // This gets rid of "epsilon" transitions in the FST. - // These are transitions that don't require a string input to be taken. - // Getting rid of them is necessary to make the FST determinisitc, but - // can greatly increase the size of the FST - fst::RmEpsilon(&dictionary); - fst::StdVectorFst* new_dict = new fst::StdVectorFst; + // reconstruct word + std::string word = vec2str(prefix_vec); + ngram.push_back(word); - // This makes the FST deterministic, meaning for any string input there's - // only one possible state the FST could be in. It is assumed our - // dictionary is deterministic when using it. - // (lest we'd have to check for multiple transitions at each state) - fst::Determinize(dictionary, new_dict); - - // Finds the simplest equivalent fst. This is unnecessary but decreases - // memory usage of the dictionary - fst::Minimize(new_dict); - this->dictionary = new_dict; + if (new_node->character == -1) { + // No more spaces, but still need order + for (int i = 0; i < _max_order - order - 1; i++) { + ngram.push_back(START_TOKEN); + } + break; + } + } + std::reverse(ngram.begin(), ngram.end()); + return ngram; +} +void Scorer::fill_dictionary(bool add_space) { + fst::StdVectorFst dictionary; + // First reverse char_list so ints can be accessed by chars + std::unordered_map char_map; + for (unsigned int i = 0; i < _char_list.size(); i++) { + char_map[_char_list[i]] = i; + } + + // For each unigram convert to ints and put in trie + int vocab_size = 0; + for (const auto& word : _vocabulary) { + bool added = add_word_to_dictionary( + word, char_map, add_space, _SPACE_ID, &dictionary); + vocab_size += added ? 1 : 0; + } + + std::cerr << "Vocab Size " << vocab_size << std::endl; + + // Simplify FST + + // This gets rid of "epsilon" transitions in the FST. + // These are transitions that don't require a string input to be taken. + // Getting rid of them is necessary to make the FST determinisitc, but + // can greatly increase the size of the FST + fst::RmEpsilon(&dictionary); + fst::StdVectorFst* new_dict = new fst::StdVectorFst; + + // This makes the FST deterministic, meaning for any string input there's + // only one possible state the FST could be in. It is assumed our + // dictionary is deterministic when using it. + // (lest we'd have to check for multiple transitions at each state) + fst::Determinize(dictionary, new_dict); + + // Finds the simplest equivalent fst. This is unnecessary but decreases + // memory usage of the dictionary + fst::Minimize(new_dict); + this->dictionary = new_dict; } diff --git a/deploy/scorer.h b/deploy/scorer.h index e3d61a71c..0c78b9870 100644 --- a/deploy/scorer.h +++ b/deploy/scorer.h @@ -1,31 +1,31 @@ #ifndef SCORER_H_ #define SCORER_H_ -#include #include -#include +#include #include +#include #include "lm/enumerate_vocab.hh" -#include "lm/word_index.hh" #include "lm/virtual_interface.hh" -#include "util/string_piece.hh" +#include "lm/word_index.hh" #include "path_trie.h" +#include "util/string_piece.hh" const double OOV_SCORE = -1000.0; const std::string START_TOKEN = ""; const std::string UNK_TOKEN = ""; const std::string END_TOKEN = ""; - // Implement a callback to retrive string vocabulary. +// Implement a callback to retrive string vocabulary. class RetriveStrEnumerateVocab : public lm::EnumerateVocab { public: - RetriveStrEnumerateVocab() {} + RetriveStrEnumerateVocab() {} - void Add(lm::WordIndex index, const StringPiece& str) { - vocabulary.push_back(std::string(str.data(), str.length())); - } + void Add(lm::WordIndex index, const StringPiece& str) { + vocabulary.push_back(std::string(str.data(), str.length())); + } - std::vector vocabulary; + std::vector vocabulary; }; // External scorer to query languange score for n-gram or sentence. @@ -33,59 +33,59 @@ public: // Scorer scorer(alpha, beta, "path_of_language_model"); // scorer.get_log_cond_prob({ "WORD1", "WORD2", "WORD3" }); // scorer.get_sent_log_prob({ "WORD1", "WORD2", "WORD3" }); -class Scorer{ +class Scorer { public: - Scorer(double alpha, double beta, const std::string& lm_path); - ~Scorer(); + Scorer(double alpha, double beta, const std::string& lm_path); + ~Scorer(); - double get_log_cond_prob(const std::vector& words); + double get_log_cond_prob(const std::vector& words); - double get_sent_log_prob(const std::vector& words); + double get_sent_log_prob(const std::vector& words); - size_t get_max_order() { return _max_order; } + size_t get_max_order() { return _max_order; } - bool is_char_map_empty() {return _char_map.size() == 0; } + bool is_char_map_empty() { return _char_map.size() == 0; } - bool is_character_based() { return _is_character_based; } + bool is_character_based() { return _is_character_based; } - // reset params alpha & beta - void reset_params(float alpha, float beta); + // reset params alpha & beta + void reset_params(float alpha, float beta); - // make ngram - std::vector make_ngram(PathTrie* prefix); + // make ngram + std::vector make_ngram(PathTrie* prefix); - // fill dictionary for fst - void fill_dictionary(bool add_space); + // fill dictionary for fst + void fill_dictionary(bool add_space); - // set char map - void set_char_map(std::vector char_list); + // set char map + void set_char_map(std::vector char_list); - std::vector split_labels(const std::vector &labels); + std::vector split_labels(const std::vector& labels); - // expose to decoder - double alpha; - double beta; + // expose to decoder + double alpha; + double beta; - // fst dictionary - void* dictionary; + // fst dictionary + void* dictionary; protected: - void load_LM(const char* filename); + void load_LM(const char* filename); - double get_log_prob(const std::vector& words); + double get_log_prob(const std::vector& words); - std::string vec2str(const std::vector &input); + std::string vec2str(const std::vector& input); private: - void* _language_model; - bool _is_character_based; - size_t _max_order; + void* _language_model; + bool _is_character_based; + size_t _max_order; - int _SPACE_ID; - std::vector _char_list; - std::unordered_map _char_map; + int _SPACE_ID; + std::vector _char_list; + std::unordered_map _char_map; - std::vector _vocabulary; + std::vector _vocabulary; }; -#endif // SCORER_H_ +#endif // SCORER_H_ From d776ce9bd71d1878bd51c2a795bd4373dd0119fb Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Wed, 6 Sep 2017 16:02:22 +0800 Subject: [PATCH 174/335] Fix import errors in unitests. --- models/tests/test_decoders.py | 14 ++++++++------ utils/tests/test_error_rate.py | 2 +- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/models/tests/test_decoders.py b/models/tests/test_decoders.py index fa43879b8..acce46af8 100644 --- a/models/tests/test_decoders.py +++ b/models/tests/test_decoders.py @@ -4,7 +4,7 @@ from __future__ import division from __future__ import print_function import unittest -from decoder import * +from models import decoder class TestDecoders(unittest.TestCase): @@ -53,15 +53,17 @@ class TestDecoders(unittest.TestCase): self.beam_search_result = ['acdc', "b'a"] def test_greedy_decoder_1(self): - bst_result = ctc_greedy_decoder(self.probs_seq1, self.vocab_list) + bst_result = decoder.ctc_greedy_decoder(self.probs_seq1, + self.vocab_list) self.assertEqual(bst_result, self.greedy_result[0]) def test_greedy_decoder_2(self): - bst_result = ctc_greedy_decoder(self.probs_seq2, self.vocab_list) + bst_result = decoder.ctc_greedy_decoder(self.probs_seq2, + self.vocab_list) self.assertEqual(bst_result, self.greedy_result[1]) def test_beam_search_decoder_1(self): - beam_result = ctc_beam_search_decoder( + beam_result = decoder.ctc_beam_search_decoder( probs_seq=self.probs_seq1, beam_size=self.beam_size, vocabulary=self.vocab_list, @@ -69,7 +71,7 @@ class TestDecoders(unittest.TestCase): self.assertEqual(beam_result[0][1], self.beam_search_result[0]) def test_beam_search_decoder_2(self): - beam_result = ctc_beam_search_decoder( + beam_result = decoder.ctc_beam_search_decoder( probs_seq=self.probs_seq2, beam_size=self.beam_size, vocabulary=self.vocab_list, @@ -77,7 +79,7 @@ class TestDecoders(unittest.TestCase): self.assertEqual(beam_result[0][1], self.beam_search_result[1]) def test_beam_search_decoder_batch(self): - beam_results = ctc_beam_search_decoder_batch( + beam_results = decoder.ctc_beam_search_decoder_batch( probs_split=[self.probs_seq1, self.probs_seq2], beam_size=self.beam_size, vocabulary=self.vocab_list, diff --git a/utils/tests/test_error_rate.py b/utils/tests/test_error_rate.py index 99e137a9a..d6bc7442e 100644 --- a/utils/tests/test_error_rate.py +++ b/utils/tests/test_error_rate.py @@ -5,7 +5,7 @@ from __future__ import division from __future__ import print_function import unittest -import error_rate +from utils import error_rate class TestParse(unittest.TestCase): From 861b946d7a33670efeb4692aade2a14465ec5649 Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Thu, 7 Sep 2017 11:26:58 +0800 Subject: [PATCH 175/335] Re-design README.md doc structure and add table of contents. --- README.md | 53 ++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 38 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index db07d8c20..1962c1ccb 100644 --- a/README.md +++ b/README.md @@ -1,18 +1,39 @@ # DeepSpeech2 on PaddlePaddle ->TODO: to be updated, since the directory hierarchy was changed. +*DeepSpeech2 on PaddlePaddle* is an open-source implementation of end-to-end Automatic Speech Recognition (ASR) engine, based on [Baidu's Deep Speech 2 paper](http://proceedings.mlr.press/v48/amodei16.pdf), with [PaddlePaddle](https://github.com/PaddlePaddle/Paddle) platform. Our vision is to empower both industrial application and academic research on speech-to-text, via an easy-to-use, efficent and scalable integreted implementation, including training & inferencing module, distributed [PaddleCloud](https://github.com/PaddlePaddle/cloud) training, and demo deployment. Besides, several pre-trained models for both English and Mandarin speech are also released. + +## Table of Contents +- [Prerequisites](#prerequisites) +- [Installation](#installation) +- [Getting Started](#getting-started) +- [Data Preparation](#data-preparation) +- [Training a Model](#training-a-model) +- [Inference and Evaluation](#inference-and-evaluation) +- [Distributed Cloud Training](#distributed-cloud-training) +- [Hyper-parameters Tuning](#hyper-parameters-tuning) +- [Trying Live Demo with Your Own Voice](#trying-live-demo-with-your-own-voice) +- [Experiments and Benchmarks](#experiments-and-benchmarks) +- [Questions and Help](#questions-and-help) + +## Prerequisites +- Only support Python 2.7 +- PaddlePaddle the latest version (please refer to the [Installation Guide](https://github.com/PaddlePaddle/Paddle#installation)) ## Installation +Please install the [prerequisites](#prerequisites) above before moving on this. + ``` +git clone https://github.com/PaddlePaddle/models.git +cd models/deep_speech_2 sh setup.sh ``` -Please replace `$PADDLE_INSTALL_DIR` with your own paddle installation directory. +## Getting Started -## Usage +TODO -### Preparing Data +## Data Preparation ``` cd datasets @@ -31,7 +52,7 @@ More help for arguments: python datasets/librispeech/librispeech.py --help ``` -### Preparing for Training + ``` python tools/compute_mean_std.py @@ -51,7 +72,7 @@ More help for arguments: python tools/compute_mean_std.py --help ``` -### Training +## Training a model For GPU Training: @@ -71,7 +92,7 @@ More help for arguments: python train.py --help ``` -### Preparing language model +### Inference and Evaluation The following steps, inference, parameters tuning and evaluating, will require a language model during decoding. A compressed language model is provided and can be accessed by @@ -82,7 +103,7 @@ sh run.sh cd .. ``` -### Inference + For GPU inference @@ -102,7 +123,6 @@ More help for arguments: python infer.py --help ``` -### Evaluating ``` CUDA_VISIBLE_DEVICES=0 python evaluate.py @@ -114,7 +134,7 @@ More help for arguments: python evaluate.py --help ``` -### Parameters tuning +## Hyper-parameters Tuning Usually, the parameters $\alpha$ and $\beta$ for the CTC [prefix beam search](https://arxiv.org/abs/1408.2873) decoder need to be tuned after retraining the acoustic model. @@ -138,7 +158,12 @@ python tune.py --help Then reset parameters with the tuning result before inference or evaluating. -### Playing with the ASR Demo +## Distributed Cloud Training + +If you wish to train DeepSpeech2 on PaddleCloud, please refer to +[Train DeepSpeech2 on PaddleCloud](https://github.com/PaddlePaddle/models/tree/develop/deep_speech_2/cloud). + +## Trying Live Demo with Your Own Voice A real-time ASR demo is built for users to try out the ASR model with their own voice. Please do the following installation on the machine you'd like to run the demo's client (no need for the machine running the demo's server). @@ -163,8 +188,6 @@ On the client console, press and hold the "white-space" key on the keyboard to s It could be possible to start the server and the client in two seperate machines, e.g. `demo_client.py` is usually started in a machine with a microphone hardware, while `demo_server.py` is usually started in a remote server with powerful GPUs. Please first make sure that these two machines have network access to each other, and then use `--host_ip` and `--host_port` to indicate the server machine's actual IP address (instead of the `localhost` as default) and TCP port, in both `demo_server.py` and `demo_client.py`. +## Experiments and Benchmarks -## PaddleCloud Training - -If you wish to train DeepSpeech2 on PaddleCloud, please refer to -[Train DeepSpeech2 on PaddleCloud](https://github.com/PaddlePaddle/models/tree/develop/deep_speech_2/cloud). +## Questions and Help From 5d68ba1a73fe8f16102311141bd150811c7c8537 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Thu, 7 Sep 2017 20:20:23 +0800 Subject: [PATCH 176/335] fix missing import --- models/model.py | 1 + 1 file changed, 1 insertion(+) diff --git a/models/model.py b/models/model.py index 93c4c41bf..c072a5f69 100644 --- a/models/model.py +++ b/models/model.py @@ -10,6 +10,7 @@ import gzip import paddle.v2 as paddle from lm.lm_scorer import LmScorer from models.decoder import ctc_greedy_decoder, ctc_beam_search_decoder +from models.decoder import ctc_beam_search_decoder_batch from models.network import deep_speech_v2_network From 69e0d86ddb9661b701979c45d3d121dd51496533 Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Thu, 7 Sep 2017 21:24:10 +0800 Subject: [PATCH 177/335] fix wrong path in prepare.sh --- examples/librispeech/prepare_data.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/librispeech/prepare_data.sh b/examples/librispeech/prepare_data.sh index 162a38c49..10b85d058 100644 --- a/examples/librispeech/prepare_data.sh +++ b/examples/librispeech/prepare_data.sh @@ -6,7 +6,7 @@ pushd ../.. python data/librispeech/librispeech.py \ --manifest_prefix='data/librispeech/manifest' \ --full_download='True' \ ---target_dir='~/.cache/paddle/dataset/speech/Libri' +--target_dir=$HOME'/.cache/paddle/dataset/speech/Libri' if [ $? -ne 0 ]; then echo "Prepare LibriSpeech failed. Terminated." From 7d0458c7f759574c9f6447538a7fafeaa3e8bb94 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Fri, 8 Sep 2017 15:20:23 +0800 Subject: [PATCH 178/335] adapt to the new folder structure of DS2 --- examples/librispeech/generate.sh | 6 +++--- examples/librispeech/run_test.sh | 8 ++++---- infer.py | 4 +++- models/model.py | 12 ++++++++---- {deploy => models/swig_decoders}/README.md | 0 {deploy => models/swig_decoders}/__init__.py | 0 {deploy => models/swig_decoders}/_init_paths.py | 0 {deploy => models/swig_decoders}/ctc_decoders.cpp | 4 ++-- {deploy => models/swig_decoders}/ctc_decoders.h | 2 +- {deploy => models/swig_decoders}/decoder_utils.cpp | 0 {deploy => models/swig_decoders}/decoder_utils.h | 0 {deploy => models/swig_decoders}/decoders.i | 0 {deploy => models/swig_decoders}/path_trie.cpp | 0 {deploy => models/swig_decoders}/path_trie.h | 0 {deploy => models/swig_decoders}/scorer.cpp | 0 {deploy => models/swig_decoders}/scorer.h | 0 {deploy => models/swig_decoders}/setup.py | 0 {deploy => models}/swig_decoders_wrapper.py | 4 ++-- test.py | 3 ++- 19 files changed, 25 insertions(+), 18 deletions(-) rename {deploy => models/swig_decoders}/README.md (100%) rename {deploy => models/swig_decoders}/__init__.py (100%) rename {deploy => models/swig_decoders}/_init_paths.py (100%) rename {deploy => models/swig_decoders}/ctc_decoders.cpp (98%) rename {deploy => models/swig_decoders}/ctc_decoders.h (96%) rename {deploy => models/swig_decoders}/decoder_utils.cpp (100%) rename {deploy => models/swig_decoders}/decoder_utils.h (100%) rename {deploy => models/swig_decoders}/decoders.i (100%) rename {deploy => models/swig_decoders}/path_trie.cpp (100%) rename {deploy => models/swig_decoders}/path_trie.h (100%) rename {deploy => models/swig_decoders}/scorer.cpp (100%) rename {deploy => models/swig_decoders}/scorer.h (100%) rename {deploy => models/swig_decoders}/setup.py (100%) rename {deploy => models}/swig_decoders_wrapper.py (97%) diff --git a/examples/librispeech/generate.sh b/examples/librispeech/generate.sh index a34b7bc10..752aafb6a 100644 --- a/examples/librispeech/generate.sh +++ b/examples/librispeech/generate.sh @@ -12,9 +12,9 @@ python -u infer.py \ --num_conv_layers=2 \ --num_rnn_layers=3 \ --rnn_layer_size=2048 \ ---alpha=0.36 \ ---beta=0.25 \ ---cutoff_prob=0.99 \ +--alpha=2.15 \ +--beta=0.35 \ +--cutoff_prob=1.0 \ --use_gru=False \ --use_gpu=True \ --share_rnn_weights=True \ diff --git a/examples/librispeech/run_test.sh b/examples/librispeech/run_test.sh index 5a14cb682..350db8f02 100644 --- a/examples/librispeech/run_test.sh +++ b/examples/librispeech/run_test.sh @@ -3,7 +3,7 @@ pushd ../.. CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ -python -u evaluate.py \ +python -u test.py \ --batch_size=128 \ --trainer_count=8 \ --beam_size=500 \ @@ -12,9 +12,9 @@ python -u evaluate.py \ --num_conv_layers=2 \ --num_rnn_layers=3 \ --rnn_layer_size=2048 \ ---alpha=0.36 \ ---beta=0.25 \ ---cutoff_prob=0.99 \ +--alpha=2.15 \ +--beta=0.35 \ +--cutoff_prob=1.0 \ --use_gru=False \ --use_gpu=True \ --share_rnn_weights=True \ diff --git a/infer.py b/infer.py index 1ce969ae0..44ee93581 100644 --- a/infer.py +++ b/infer.py @@ -84,6 +84,8 @@ def infer(): use_gru=args.use_gru, pretrained_model_path=args.model_path, share_rnn_weights=args.share_rnn_weights) + + vocab_list = [chars.encode("utf-8") for chars in data_generator.vocab_list] result_transcripts = ds2_model.infer_batch( infer_data=infer_data, decoding_method=args.decoding_method, @@ -91,7 +93,7 @@ def infer(): beam_beta=args.beta, beam_size=args.beam_size, cutoff_prob=args.cutoff_prob, - vocab_list=data_generator.vocab_list, + vocab_list=vocab_list, language_model_path=args.lang_model_path, num_processes=args.num_proc_bsearch) diff --git a/models/model.py b/models/model.py index 93c4c41bf..b239d5f39 100644 --- a/models/model.py +++ b/models/model.py @@ -8,8 +8,9 @@ import os import time import gzip import paddle.v2 as paddle -from lm.lm_scorer import LmScorer -from models.decoder import ctc_greedy_decoder, ctc_beam_search_decoder +from models.swig_decoders_wrapper import Scorer +from models.swig_decoders_wrapper import ctc_greedy_decoder +from models.swig_decoders_wrapper import ctc_beam_search_decoder_batch from models.network import deep_speech_v2_network @@ -199,9 +200,12 @@ class DeepSpeech2Model(object): elif decoding_method == "ctc_beam_search": # initialize external scorer if self._ext_scorer == None: - self._ext_scorer = LmScorer(beam_alpha, beam_beta, - language_model_path) + self._ext_scorer = Scorer(beam_alpha, beam_beta, + language_model_path) self._loaded_lm_path = language_model_path + self._ext_scorer.set_char_map(vocab_list) + if (not self._ext_scorer.is_character_based()): + self._ext_scorer.fill_dictionary(True) else: self._ext_scorer.reset_params(beam_alpha, beam_beta) assert self._loaded_lm_path == language_model_path diff --git a/deploy/README.md b/models/swig_decoders/README.md similarity index 100% rename from deploy/README.md rename to models/swig_decoders/README.md diff --git a/deploy/__init__.py b/models/swig_decoders/__init__.py similarity index 100% rename from deploy/__init__.py rename to models/swig_decoders/__init__.py diff --git a/deploy/_init_paths.py b/models/swig_decoders/_init_paths.py similarity index 100% rename from deploy/_init_paths.py rename to models/swig_decoders/_init_paths.py diff --git a/deploy/ctc_decoders.cpp b/models/swig_decoders/ctc_decoders.cpp similarity index 98% rename from deploy/ctc_decoders.cpp rename to models/swig_decoders/ctc_decoders.cpp index cedb943ea..e60e66965 100644 --- a/deploy/ctc_decoders.cpp +++ b/models/swig_decoders/ctc_decoders.cpp @@ -10,8 +10,8 @@ #include "fst/fstlib.h" #include "path_trie.h" -std::string ctc_best_path_decoder(std::vector> probs_seq, - std::vector vocabulary) { +std::string ctc_greedy_decoder(std::vector> probs_seq, + std::vector vocabulary) { // dimension check int num_time_steps = probs_seq.size(); for (int i = 0; i < num_time_steps; i++) { diff --git a/deploy/ctc_decoders.h b/models/swig_decoders/ctc_decoders.h similarity index 96% rename from deploy/ctc_decoders.h rename to models/swig_decoders/ctc_decoders.h index 78edefb77..a0028a324 100644 --- a/deploy/ctc_decoders.h +++ b/models/swig_decoders/ctc_decoders.h @@ -16,7 +16,7 @@ * A vector that each element is a pair of score and decoding result, * in desending order. */ -std::string ctc_best_path_decoder(std::vector> probs_seq, +std::string ctc_greedy_decoder(std::vector> probs_seq, std::vector vocabulary); /* CTC Beam Search Decoder diff --git a/deploy/decoder_utils.cpp b/models/swig_decoders/decoder_utils.cpp similarity index 100% rename from deploy/decoder_utils.cpp rename to models/swig_decoders/decoder_utils.cpp diff --git a/deploy/decoder_utils.h b/models/swig_decoders/decoder_utils.h similarity index 100% rename from deploy/decoder_utils.h rename to models/swig_decoders/decoder_utils.h diff --git a/deploy/decoders.i b/models/swig_decoders/decoders.i similarity index 100% rename from deploy/decoders.i rename to models/swig_decoders/decoders.i diff --git a/deploy/path_trie.cpp b/models/swig_decoders/path_trie.cpp similarity index 100% rename from deploy/path_trie.cpp rename to models/swig_decoders/path_trie.cpp diff --git a/deploy/path_trie.h b/models/swig_decoders/path_trie.h similarity index 100% rename from deploy/path_trie.h rename to models/swig_decoders/path_trie.h diff --git a/deploy/scorer.cpp b/models/swig_decoders/scorer.cpp similarity index 100% rename from deploy/scorer.cpp rename to models/swig_decoders/scorer.cpp diff --git a/deploy/scorer.h b/models/swig_decoders/scorer.h similarity index 100% rename from deploy/scorer.h rename to models/swig_decoders/scorer.h diff --git a/deploy/setup.py b/models/swig_decoders/setup.py similarity index 100% rename from deploy/setup.py rename to models/swig_decoders/setup.py diff --git a/deploy/swig_decoders_wrapper.py b/models/swig_decoders_wrapper.py similarity index 97% rename from deploy/swig_decoders_wrapper.py rename to models/swig_decoders_wrapper.py index b44fae0ae..202440bfb 100644 --- a/deploy/swig_decoders_wrapper.py +++ b/models/swig_decoders_wrapper.py @@ -23,7 +23,7 @@ class Scorer(swig_decoders.Scorer): swig_decoders.Scorer.__init__(self, alpha, beta, model_path) -def ctc_best_path_decoder(probs_seq, vocabulary): +def ctc_greedy_decoder(probs_seq, vocabulary): """Wrapper for ctc best path decoder in swig. :param probs_seq: 2-D list of probability distributions over each time @@ -35,7 +35,7 @@ def ctc_best_path_decoder(probs_seq, vocabulary): :return: Decoding result string. :rtype: basestring """ - return swig_decoders.ctc_best_path_decoder(probs_seq.tolist(), vocabulary) + return swig_decoders.ctc_greedy_decoder(probs_seq.tolist(), vocabulary) def ctc_beam_search_decoder(probs_seq, diff --git a/test.py b/test.py index 747e40df8..ec5d17f30 100644 --- a/test.py +++ b/test.py @@ -85,6 +85,7 @@ def evaluate(): pretrained_model_path=args.model_path, share_rnn_weights=args.share_rnn_weights) + vocab_list = [chars.encode("utf-8") for chars in data_generator.vocab_list] error_rate_func = cer if args.error_rate_type == 'cer' else wer error_sum, num_ins = 0.0, 0 for infer_data in batch_reader(): @@ -95,7 +96,7 @@ def evaluate(): beam_beta=args.beta, beam_size=args.beam_size, cutoff_prob=args.cutoff_prob, - vocab_list=data_generator.vocab_list, + vocab_list=vocab_list, language_model_path=args.lang_model_path, num_processes=args.num_proc_bsearch) target_transcripts = [ From f3f5dad80c178f48e4a18eda414ad16a2e6b56b0 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Fri, 8 Sep 2017 17:27:56 +0800 Subject: [PATCH 179/335] format header includes & update setup info --- README.md | 10 ++ deploy.py | 238 ------------------------- models/swig_decoders/README.md | 57 ------ models/swig_decoders/ctc_decoders.cpp | 18 +- models/swig_decoders/ctc_decoders.h | 15 +- models/swig_decoders/decoder_utils.cpp | 1 + models/swig_decoders/path_trie.cpp | 3 +- models/swig_decoders/path_trie.h | 4 +- models/swig_decoders/scorer.cpp | 7 +- models/swig_decoders/scorer.h | 18 +- models/swig_decoders/setup.sh | 21 +++ 11 files changed, 71 insertions(+), 321 deletions(-) delete mode 100644 deploy.py delete mode 100644 models/swig_decoders/README.md create mode 100644 models/swig_decoders/setup.sh diff --git a/README.md b/README.md index db07d8c20..2cc12690b 100644 --- a/README.md +++ b/README.md @@ -82,6 +82,16 @@ sh run.sh cd .. ``` +### Setup decoders + +```shell +cd models/swig_decoders +sh setup.sh +cd ../.. +``` + +These commands will install the decoders that translate the ouptut probability vectors of DS2 model to text data, incuding CTC greedy decoder, CTC beam search decoder and its batch version. + ### Inference For GPU inference diff --git a/deploy.py b/deploy.py deleted file mode 100644 index 11972f5f7..000000000 --- a/deploy.py +++ /dev/null @@ -1,238 +0,0 @@ -"""Deployment for DeepSpeech2 model.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import argparse -import gzip -import distutils.util -import multiprocessing -import paddle.v2 as paddle -from data_utils.data import DataGenerator -from layer import deep_speech2 -from deploy.swig_decoders_wrapper import * -from error_rate import wer -import utils -import time - -parser = argparse.ArgumentParser(description=__doc__) -parser.add_argument( - "--num_samples", - default=10, - type=int, - help="Number of samples for inference. (default: %(default)s)") -parser.add_argument( - "--num_conv_layers", - default=2, - type=int, - help="Convolution layer number. (default: %(default)s)") -parser.add_argument( - "--num_rnn_layers", - default=3, - type=int, - help="RNN layer number. (default: %(default)s)") -parser.add_argument( - "--rnn_layer_size", - default=512, - type=int, - help="RNN layer cell number. (default: %(default)s)") -parser.add_argument( - "--use_gpu", - default=True, - type=distutils.util.strtobool, - help="Use gpu or not. (default: %(default)s)") -parser.add_argument( - "--num_threads_data", - default=multiprocessing.cpu_count(), - type=int, - help="Number of cpu threads for preprocessing data. (default: %(default)s)") -parser.add_argument( - "--num_processes_beam_search", - default=multiprocessing.cpu_count(), - type=int, - help="Number of cpu processes for beam search. (default: %(default)s)") -parser.add_argument( - "--mean_std_filepath", - default='mean_std.npz', - type=str, - help="Manifest path for normalizer. (default: %(default)s)") -parser.add_argument( - "--decode_manifest_path", - default='datasets/manifest.test', - type=str, - help="Manifest path for decoding. (default: %(default)s)") -parser.add_argument( - "--model_filepath", - default='checkpoints/params.latest.tar.gz', - type=str, - help="Model filepath. (default: %(default)s)") -parser.add_argument( - "--vocab_filepath", - default='datasets/vocab/eng_vocab.txt', - type=str, - help="Vocabulary filepath. (default: %(default)s)") -parser.add_argument( - "--decode_method", - default='beam_search', - type=str, - help="Method for ctc decoding: beam_search or beam_search_batch. " - "(default: %(default)s)") -parser.add_argument( - "--beam_size", - default=500, - type=int, - help="Width for beam search decoding. (default: %(default)d)") -parser.add_argument( - "--num_results_per_sample", - default=1, - type=int, - help="Number of output per sample in beam search. (default: %(default)d)") -parser.add_argument( - "--language_model_path", - default="lm/data/common_crawl_00.prune01111.trie.klm", - type=str, - help="Path for language model. (default: %(default)s)") -parser.add_argument( - "--alpha", - default=1.5, - type=float, - help="Parameter associated with language model. (default: %(default)f)") -parser.add_argument( - "--beta", - default=0.3, - type=float, - help="Parameter associated with word count. (default: %(default)f)") -parser.add_argument( - "--cutoff_prob", - default=1.0, - type=float, - help="The cutoff probability of pruning" - "in beam search. (default: %(default)f)") -parser.add_argument( - "--cutoff_top_n", - default=40, - type=int, - help="The cutoff number of pruning" - "in beam search. (default: %(default)f)") -args = parser.parse_args() - - -def infer(): - """Deployment for DeepSpeech2.""" - # initialize data generator - data_generator = DataGenerator( - vocab_filepath=args.vocab_filepath, - mean_std_filepath=args.mean_std_filepath, - augmentation_config='{}', - num_threads=args.num_threads_data) - - # create network config - # paddle.data_type.dense_array is used for variable batch input. - # The size 161 * 161 is only an placeholder value and the real shape - # of input batch data will be induced during training. - audio_data = paddle.layer.data( - name="audio_spectrogram", type=paddle.data_type.dense_array(161 * 161)) - text_data = paddle.layer.data( - name="transcript_text", - type=paddle.data_type.integer_value_sequence(data_generator.vocab_size)) - output_probs, _ = deep_speech2( - audio_data=audio_data, - text_data=text_data, - dict_size=data_generator.vocab_size, - num_conv_layers=args.num_conv_layers, - num_rnn_layers=args.num_rnn_layers, - rnn_size=args.rnn_layer_size) - - # load parameters - parameters = paddle.parameters.Parameters.from_tar( - gzip.open(args.model_filepath)) - - # prepare infer data - batch_reader = data_generator.batch_reader_creator( - manifest_path=args.decode_manifest_path, - batch_size=args.num_samples, - min_batch_size=1, - sortagrad=False, - shuffle_method=None) - infer_data = batch_reader().next() - - # run inference - inferer = paddle.inference.Inference( - output_layer=output_probs, parameters=parameters) - infer_results = inferer.infer(input=infer_data) - - num_steps = len(infer_results) // len(infer_data) - probs_split = [ - infer_results[i * num_steps:(i + 1) * num_steps] - for i in xrange(len(infer_data)) - ] - - # targe transcription - target_transcription = [ - ''.join( - [data_generator.vocab_list[index] for index in infer_data[i][1]]) - for i, probs in enumerate(probs_split) - ] - - # external scorer - ext_scorer = Scorer( - alpha=args.alpha, beta=args.beta, model_path=args.language_model_path) - - # from unicode to string - vocab_list = [chars.encode("utf-8") for chars in data_generator.vocab_list] - - # The below two steps, i.e. setting char map and filling dictionary of - # FST will be completed implicitly when ext_scorer first used.But to save - # the time of decoding the first audio sample, they are done in advance. - ext_scorer.set_char_map(vocab_list) - # only for ward based language model - ext_scorer.fill_dictionary(True) - - # for word error rate metric - wer_sum, wer_counter = 0.0, 0 - - ## decode and print - time_begin = time.time() - batch_beam_results = [] - if args.decode_method == 'beam_search': - for i, probs in enumerate(probs_split): - beam_result = ctc_beam_search_decoder( - probs_seq=probs, - beam_size=args.beam_size, - vocabulary=vocab_list, - blank_id=len(vocab_list), - cutoff_prob=args.cutoff_prob, - cutoff_top_n=args.cutoff_top_n, - ext_scoring_func=ext_scorer, ) - batch_beam_results += [beam_result] - else: - batch_beam_results = ctc_beam_search_decoder_batch( - probs_split=probs_split, - beam_size=args.beam_size, - vocabulary=vocab_list, - blank_id=len(vocab_list), - num_processes=args.num_processes_beam_search, - cutoff_prob=args.cutoff_prob, - cutoff_top_n=args.cutoff_top_n, - ext_scoring_func=ext_scorer, ) - - for i, beam_result in enumerate(batch_beam_results): - print("\nTarget Transcription:\t%s" % target_transcription[i]) - print("Beam %d: %f \t%s" % (0, beam_result[0][0], beam_result[0][1])) - wer_cur = wer(target_transcription[i], beam_result[0][1]) - wer_sum += wer_cur - wer_counter += 1 - print("cur wer = %f , average wer = %f" % - (wer_cur, wer_sum / wer_counter)) - - print("time for decoding = %f" % (time.time() - time_begin)) - - -def main(): - utils.print_arguments(args) - paddle.init(use_gpu=args.use_gpu, trainer_count=1) - infer() - - -if __name__ == '__main__': - main() diff --git a/models/swig_decoders/README.md b/models/swig_decoders/README.md deleted file mode 100644 index e817be105..000000000 --- a/models/swig_decoders/README.md +++ /dev/null @@ -1,57 +0,0 @@ - -The decoders for deployment developed in C++ are a better alternative for the prototype decoders in Pytthon, with more powerful performance in both speed and accuracy. - -### Installation - -The build depends on several open-sourced projects, first clone or download them to current directory (i.e., `deep_speech_2/deploy`) - -- [**KenLM**](https://github.com/kpu/kenlm/): Faster and Smaller Language Model Queries - -```shell -git clone https://github.com/kpu/kenlm.git -``` - -- [**OpenFst**](http://www.openfst.org/twiki/bin/view/FST/WebHome): A library for finite-state transducers - -```shell -wget http://www.openfst.org/twiki/pub/FST/FstDownload/openfst-1.6.3.tar.gz -tar -xzvf openfst-1.6.3.tar.gz -``` - - -- [**ThreadPool**](http://progsch.net/wordpress/): A library for C++ thread pool - -```shell -git clone https://github.com/progschj/ThreadPool.git -``` - -- [**SWIG**](http://www.swig.org): A tool that provides the Python interface for the decoders, please make sure it being installed. - -Then run the setup - -```shell -python setup.py install --num_processes 4 -cd .. -``` - -### Usage - -The decoders for deployment share almost the same interface with the prototye decoders in Python. After the installation succeeds, these decoders are very convenient for call in Python, and a complete example in ```deploy.py``` can be refered. - -For GPU deployment - -``` -CUDA_VISIBLE_DEVICES=0 python deploy.py -``` - -For CPU deployment - -``` -python deploy.py --use_gpu=False -``` - -More help for arguments - -``` -python deploy.py --help -``` diff --git a/models/swig_decoders/ctc_decoders.cpp b/models/swig_decoders/ctc_decoders.cpp index e60e66965..4c9a45d9e 100644 --- a/models/swig_decoders/ctc_decoders.cpp +++ b/models/swig_decoders/ctc_decoders.cpp @@ -1,17 +1,21 @@ #include "ctc_decoders.h" + #include #include #include #include #include #include + +#include "fst/fstlib.h" #include "ThreadPool.h" + #include "decoder_utils.h" -#include "fst/fstlib.h" #include "path_trie.h" -std::string ctc_greedy_decoder(std::vector> probs_seq, - std::vector vocabulary) { +std::string ctc_greedy_decoder( + const std::vector>& probs_seq, + const std::vector& vocabulary) { // dimension check int num_time_steps = probs_seq.size(); for (int i = 0; i < num_time_steps; i++) { @@ -56,7 +60,7 @@ std::string ctc_greedy_decoder(std::vector> probs_seq, } std::vector> ctc_beam_search_decoder( - std::vector> probs_seq, + const std::vector>& probs_seq, int beam_size, std::vector vocabulary, int blank_id, @@ -64,7 +68,7 @@ std::vector> ctc_beam_search_decoder( int cutoff_top_n, Scorer *extscorer) { // dimension check - int num_time_steps = probs_seq.size(); + size_t num_time_steps = probs_seq.size(); for (int i = 0; i < num_time_steps; i++) { if (probs_seq[i].size() != vocabulary.size() + 1) { std::cout << " The shape of probs_seq does not match" @@ -278,9 +282,9 @@ std::vector> ctc_beam_search_decoder( std::vector>> ctc_beam_search_decoder_batch( - std::vector>> probs_split, + const std::vector>>& probs_split, int beam_size, - std::vector vocabulary, + const std::vector& vocabulary, int blank_id, int num_processes, double cutoff_prob, diff --git a/models/swig_decoders/ctc_decoders.h b/models/swig_decoders/ctc_decoders.h index a0028a324..5b4bb7932 100644 --- a/models/swig_decoders/ctc_decoders.h +++ b/models/swig_decoders/ctc_decoders.h @@ -4,6 +4,7 @@ #include #include #include + #include "scorer.h" /* CTC Best Path Decoder @@ -16,8 +17,9 @@ * A vector that each element is a pair of score and decoding result, * in desending order. */ -std::string ctc_greedy_decoder(std::vector> probs_seq, - std::vector vocabulary); +std::string ctc_greedy_decoder( + const std::vector>& probs_seq, + const std::vector& vocabulary); /* CTC Beam Search Decoder @@ -35,7 +37,7 @@ std::string ctc_greedy_decoder(std::vector> probs_seq, * in desending order. */ std::vector> ctc_beam_search_decoder( - std::vector> probs_seq, + const std::vector>& probs_seq, int beam_size, std::vector vocabulary, int blank_id, @@ -43,8 +45,7 @@ std::vector> ctc_beam_search_decoder( int cutoff_top_n = 40, Scorer *ext_scorer = NULL); -/* CTC Beam Search Decoder for batch data, the interface is consistent with the - * original decoder in Python version. +/* CTC Beam Search Decoder for batch data * Parameters: * probs_seq: 3-D vector that each element is a 2-D vector that can be used @@ -63,9 +64,9 @@ std::vector> ctc_beam_search_decoder( */ std::vector>> ctc_beam_search_decoder_batch( - std::vector>> probs_split, + const std::vector>>& probs_split, int beam_size, - std::vector vocabulary, + const std::vector& vocabulary, int blank_id, int num_processes, double cutoff_prob = 1.0, diff --git a/models/swig_decoders/decoder_utils.cpp b/models/swig_decoders/decoder_utils.cpp index bed0f623f..d25c4deb4 100644 --- a/models/swig_decoders/decoder_utils.cpp +++ b/models/swig_decoders/decoder_utils.cpp @@ -1,4 +1,5 @@ #include "decoder_utils.h" + #include #include #include diff --git a/models/swig_decoders/path_trie.cpp b/models/swig_decoders/path_trie.cpp index db0b20cb5..9e68c0f15 100644 --- a/models/swig_decoders/path_trie.cpp +++ b/models/swig_decoders/path_trie.cpp @@ -1,3 +1,5 @@ +#include "path_trie.h" + #include #include #include @@ -5,7 +7,6 @@ #include #include "decoder_utils.h" -#include "path_trie.h" PathTrie::PathTrie() { log_prob_b_prev = -NUM_FLT_INF; diff --git a/models/swig_decoders/path_trie.h b/models/swig_decoders/path_trie.h index cac524a3f..e581ca73c 100644 --- a/models/swig_decoders/path_trie.h +++ b/models/swig_decoders/path_trie.h @@ -1,12 +1,12 @@ #ifndef PATH_TRIE_H #define PATH_TRIE_H #pragma once -#include #include #include #include #include #include +#include using FSTMATCH = fst::SortedMatcher; @@ -45,12 +45,12 @@ public: private: int _ROOT; bool _exists; + bool _has_dictionary; std::vector> _children; fst::StdVectorFst* _dictionary; fst::StdVectorFst::StateId _dictionary_state; - bool _has_dictionary; std::shared_ptr _matcher; }; diff --git a/models/swig_decoders/scorer.cpp b/models/swig_decoders/scorer.cpp index 8651eb61f..a713b0dff 100644 --- a/models/swig_decoders/scorer.cpp +++ b/models/swig_decoders/scorer.cpp @@ -1,13 +1,16 @@ #include "scorer.h" + #include #include -#include "decoder_utils.h" + #include "lm/config.hh" #include "lm/model.hh" #include "lm/state.hh" #include "util/string_piece.hh" #include "util/tokenize_piece.hh" +#include "decoder_utils.h" + using namespace lm::ngram; Scorer::Scorer(double alpha, double beta, const std::string& lm_path) { @@ -122,7 +125,7 @@ std::vector Scorer::split_labels(const std::vector& labels) { return words; } -void Scorer::set_char_map(std::vector char_list) { +void Scorer::set_char_map(const std::vector& char_list) { _char_list = char_list; _char_map.clear(); diff --git a/models/swig_decoders/scorer.h b/models/swig_decoders/scorer.h index 0c78b9870..b99a99b72 100644 --- a/models/swig_decoders/scorer.h +++ b/models/swig_decoders/scorer.h @@ -5,12 +5,14 @@ #include #include #include + #include "lm/enumerate_vocab.hh" #include "lm/virtual_interface.hh" #include "lm/word_index.hh" -#include "path_trie.h" #include "util/string_piece.hh" +#include "path_trie.h" + const double OOV_SCORE = -1000.0; const std::string START_TOKEN = ""; const std::string UNK_TOKEN = ""; @@ -28,11 +30,13 @@ public: std::vector vocabulary; }; -// External scorer to query languange score for n-gram or sentence. -// Example: -// Scorer scorer(alpha, beta, "path_of_language_model"); -// scorer.get_log_cond_prob({ "WORD1", "WORD2", "WORD3" }); -// scorer.get_sent_log_prob({ "WORD1", "WORD2", "WORD3" }); +/* External scorer to query languange score for n-gram or sentence. + * + * Example: + * Scorer scorer(alpha, beta, "path_of_language_model"); + * scorer.get_log_cond_prob({ "WORD1", "WORD2", "WORD3" }); + * scorer.get_sent_log_prob({ "WORD1", "WORD2", "WORD3" }); + */ class Scorer { public: Scorer(double alpha, double beta, const std::string& lm_path); @@ -58,7 +62,7 @@ public: void fill_dictionary(bool add_space); // set char map - void set_char_map(std::vector char_list); + void set_char_map(const std::vector& char_list); std::vector split_labels(const std::vector& labels); diff --git a/models/swig_decoders/setup.sh b/models/swig_decoders/setup.sh new file mode 100644 index 000000000..069f51d6e --- /dev/null +++ b/models/swig_decoders/setup.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +if [ ! -d kenlm ]; then + git clone https://github.com/luotao1/kenlm.git + echo -e "\n" +fi + +if [ ! -d openfst-1.6.3 ]; then + echo "Download and extract openfst ..." + wget http://www.openfst.org/twiki/pub/FST/FstDownload/openfst-1.6.3.tar.gz + tar -xzvf openfst-1.6.3.tar.gz + echo -e "\n" +fi + +if [ ! -d ThreadPool ]; then + git clone https://github.com/progschj/ThreadPool.git + echo -e "\n" +fi + +echo "Install decoders ..." +python setup.py install --num_processes 4 From adab01bbf6d83093881e2279c5ce031c0ef1361d Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Fri, 8 Sep 2017 20:35:25 +0800 Subject: [PATCH 180/335] append some comments --- models/swig_decoders/ctc_decoders.cpp | 15 +++++++------- models/swig_decoders/ctc_decoders.h | 17 ++++++++-------- models/swig_decoders/decoder_utils.cpp | 24 +++++++++++------------ models/swig_decoders/decoder_utils.h | 19 +++++++++++------- models/swig_decoders/path_trie.cpp | 2 +- models/swig_decoders/path_trie.h | 11 +++++++++++ models/swig_decoders/scorer.cpp | 27 ++++++++++++++------------ models/swig_decoders/scorer.h | 27 +++++++++++++------------- 8 files changed, 80 insertions(+), 62 deletions(-) diff --git a/models/swig_decoders/ctc_decoders.cpp b/models/swig_decoders/ctc_decoders.cpp index 4c9a45d9e..109799129 100644 --- a/models/swig_decoders/ctc_decoders.cpp +++ b/models/swig_decoders/ctc_decoders.cpp @@ -14,8 +14,8 @@ #include "path_trie.h" std::string ctc_greedy_decoder( - const std::vector>& probs_seq, - const std::vector& vocabulary) { + const std::vector> &probs_seq, + const std::vector &vocabulary) { // dimension check int num_time_steps = probs_seq.size(); for (int i = 0; i < num_time_steps; i++) { @@ -60,7 +60,7 @@ std::string ctc_greedy_decoder( } std::vector> ctc_beam_search_decoder( - const std::vector>& probs_seq, + const std::vector> &probs_seq, int beam_size, std::vector vocabulary, int blank_id, @@ -104,7 +104,7 @@ std::vector> ctc_beam_search_decoder( } if (!extscorer->is_character_based()) { if (extscorer->dictionary == nullptr) { - // fill dictionary for fst + // fill dictionary for fst with space extscorer->fill_dictionary(true); } auto fst_dict = static_cast(extscorer->dictionary); @@ -282,9 +282,9 @@ std::vector> ctc_beam_search_decoder( std::vector>> ctc_beam_search_decoder_batch( - const std::vector>>& probs_split, + const std::vector>> &probs_split, int beam_size, - const std::vector& vocabulary, + const std::vector &vocabulary, int blank_id, int num_processes, double cutoff_prob, @@ -304,8 +304,7 @@ ctc_beam_search_decoder_batch( if (extscorer->is_char_map_empty()) { extscorer->set_char_map(vocabulary); } - if (!extscorer->is_character_based() && - extscorer->dictionary == nullptr) { + if (!extscorer->is_character_based() && extscorer->dictionary == nullptr) { // init dictionary extscorer->fill_dictionary(true); } diff --git a/models/swig_decoders/ctc_decoders.h b/models/swig_decoders/ctc_decoders.h index 5b4bb7932..b8c512bda 100644 --- a/models/swig_decoders/ctc_decoders.h +++ b/models/swig_decoders/ctc_decoders.h @@ -14,12 +14,11 @@ * over vocabulary of one time step. * vocabulary: A vector of vocabulary. * Return: - * A vector that each element is a pair of score and decoding result, - * in desending order. + * The decoding result in string */ std::string ctc_greedy_decoder( - const std::vector>& probs_seq, - const std::vector& vocabulary); + const std::vector> &probs_seq, + const std::vector &vocabulary); /* CTC Beam Search Decoder @@ -37,7 +36,7 @@ std::string ctc_greedy_decoder( * in desending order. */ std::vector> ctc_beam_search_decoder( - const std::vector>& probs_seq, + const std::vector> &probs_seq, int beam_size, std::vector vocabulary, int blank_id, @@ -59,14 +58,14 @@ std::vector> ctc_beam_search_decoder( * cutoff_top_n: Cutoff number for pruning. * ext_scorer: External scorer to evaluate a prefix. * Return: - * A 2-D vector that each element is a vector of decoding result for one - * sample. + * A 2-D vector that each element is a vector of beam search decoding + * result for one audio sample. */ std::vector>> ctc_beam_search_decoder_batch( - const std::vector>>& probs_split, + const std::vector>> &probs_split, int beam_size, - const std::vector& vocabulary, + const std::vector &vocabulary, int blank_id, int num_processes, double cutoff_prob = 1.0, diff --git a/models/swig_decoders/decoder_utils.cpp b/models/swig_decoders/decoder_utils.cpp index d25c4deb4..989b067e7 100644 --- a/models/swig_decoders/decoder_utils.cpp +++ b/models/swig_decoders/decoder_utils.cpp @@ -4,7 +4,7 @@ #include #include -size_t get_utf8_str_len(const std::string& str) { +size_t get_utf8_str_len(const std::string &str) { size_t str_len = 0; for (char c : str) { str_len += ((c & 0xc0) != 0x80); @@ -12,7 +12,7 @@ size_t get_utf8_str_len(const std::string& str) { return str_len; } -std::vector split_utf8_str(const std::string& str) { +std::vector split_utf8_str(const std::string &str) { std::vector result; std::string out_str; @@ -31,8 +31,8 @@ std::vector split_utf8_str(const std::string& str) { return result; } -std::vector split_str(const std::string& s, - const std::string& delim) { +std::vector split_str(const std::string &s, + const std::string &delim) { std::vector result; std::size_t start = 0, delim_len = delim.size(); while (true) { @@ -51,7 +51,7 @@ std::vector split_str(const std::string& s, return result; } -bool prefix_compare(const PathTrie* x, const PathTrie* y) { +bool prefix_compare(const PathTrie *x, const PathTrie *y) { if (x->score == y->score) { if (x->character == y->character) { return false; @@ -63,8 +63,8 @@ bool prefix_compare(const PathTrie* x, const PathTrie* y) { } } -void add_word_to_fst(const std::vector& word, - fst::StdVectorFst* dictionary) { +void add_word_to_fst(const std::vector &word, + fst::StdVectorFst *dictionary) { if (dictionary->NumStates() == 0) { fst::StdVectorFst::StateId start = dictionary->AddState(); assert(start == 0); @@ -81,16 +81,16 @@ void add_word_to_fst(const std::vector& word, } bool add_word_to_dictionary( - const std::string& word, - const std::unordered_map& char_map, + const std::string &word, + const std::unordered_map &char_map, bool add_space, int SPACE_ID, - fst::StdVectorFst* dictionary) { + fst::StdVectorFst *dictionary) { auto characters = split_utf8_str(word); std::vector int_word; - for (auto& c : characters) { + for (auto &c : characters) { if (c == " ") { int_word.push_back(SPACE_ID); } else { @@ -108,5 +108,5 @@ bool add_word_to_dictionary( } add_word_to_fst(int_word, dictionary); - return true; + return true; // return with successful adding } diff --git a/models/swig_decoders/decoder_utils.h b/models/swig_decoders/decoder_utils.h index 51985c86e..d4ee36e1b 100644 --- a/models/swig_decoders/decoder_utils.h +++ b/models/swig_decoders/decoder_utils.h @@ -14,12 +14,14 @@ bool pair_comp_first_rev(const std::pair &a, return a.first > b.first; } +// Function template for comparing two pairs template bool pair_comp_second_rev(const std::pair &a, const std::pair &b) { return a.second > b.second; } +// Return the sum of two probabilities in log scale template T log_sum_exp(const T &x, const T &y) { static T num_min = -std::numeric_limits::max(); @@ -32,18 +34,21 @@ T log_sum_exp(const T &x, const T &y) { // Functor for prefix comparsion bool prefix_compare(const PathTrie *x, const PathTrie *y); -// Get length of utf8 encoding string -// See: http://stackoverflow.com/a/4063229 +/* Get length of utf8 encoding string + * See: http://stackoverflow.com/a/4063229 + */ size_t get_utf8_str_len(const std::string &str); -// Split a string into a list of strings on a given string -// delimiter. NB: delimiters on beginning / end of string are -// trimmed. Eg, "FooBarFoo" split on "Foo" returns ["Bar"]. +/* Split a string into a list of strings on a given string + * delimiter. NB: delimiters on beginning / end of string are + * trimmed. Eg, "FooBarFoo" split on "Foo" returns ["Bar"]. + */ std::vector split_str(const std::string &s, const std::string &delim); -// Splits string into vector of strings representing -// UTF-8 characters (not same as chars) +/* Splits string into vector of strings representing + * UTF-8 characters (not same as chars) + */ std::vector split_utf8_str(const std::string &str); // Add a word in index to the dicionary of fst diff --git a/models/swig_decoders/path_trie.cpp b/models/swig_decoders/path_trie.cpp index 9e68c0f15..6a1f6170f 100644 --- a/models/swig_decoders/path_trie.cpp +++ b/models/swig_decoders/path_trie.cpp @@ -22,7 +22,7 @@ PathTrie::PathTrie() { _dictionary = nullptr; _dictionary_state = 0; _has_dictionary = false; - _matcher = nullptr; // finds arcs in FST + _matcher = nullptr; } PathTrie::~PathTrie() { diff --git a/models/swig_decoders/path_trie.h b/models/swig_decoders/path_trie.h index e581ca73c..6f150e420 100644 --- a/models/swig_decoders/path_trie.h +++ b/models/swig_decoders/path_trie.h @@ -10,27 +10,36 @@ using FSTMATCH = fst::SortedMatcher; +/* Trie tree for prefix storing and manipulating, with a dictionary in + * finite-state transducer for spelling correction. + */ class PathTrie { public: PathTrie(); ~PathTrie(); + // get new prefix after appending new char PathTrie* get_path_trie(int new_char, bool reset = true); + // get the prefix in index from root to current node PathTrie* get_path_vec(std::vector& output); + // get the prefix in index from some stop node to current nodel PathTrie* get_path_vec(std::vector& output, int stop, size_t max_steps = std::numeric_limits::max()); + // update log probs void iterate_to_vec(std::vector& output); + // set dictionary for FST void set_dictionary(fst::StdVectorFst* dictionary); void set_matcher(std::shared_ptr matcher); bool is_empty() { return _ROOT == character; } + // remove current path from root void remove(); float log_prob_b_prev; @@ -49,8 +58,10 @@ private: std::vector> _children; + // pointer to dictionary of FST fst::StdVectorFst* _dictionary; fst::StdVectorFst::StateId _dictionary_state; + // true if finding ars in FST std::shared_ptr _matcher; }; diff --git a/models/swig_decoders/scorer.cpp b/models/swig_decoders/scorer.cpp index a713b0dff..75919c3c9 100644 --- a/models/swig_decoders/scorer.cpp +++ b/models/swig_decoders/scorer.cpp @@ -68,7 +68,7 @@ double Scorer::get_log_cond_prob(const std::vector& words) { state = out_state; out_state = tmp_state; } - // log10 prob + // return log10 prob return cond_prob; } @@ -189,23 +189,26 @@ void Scorer::fill_dictionary(bool add_space) { std::cerr << "Vocab Size " << vocab_size << std::endl; - // Simplify FST + /* Simplify FST - // This gets rid of "epsilon" transitions in the FST. - // These are transitions that don't require a string input to be taken. - // Getting rid of them is necessary to make the FST determinisitc, but - // can greatly increase the size of the FST + * This gets rid of "epsilon" transitions in the FST. + * These are transitions that don't require a string input to be taken. + * Getting rid of them is necessary to make the FST determinisitc, but + * can greatly increase the size of the FST + */ fst::RmEpsilon(&dictionary); fst::StdVectorFst* new_dict = new fst::StdVectorFst; - // This makes the FST deterministic, meaning for any string input there's - // only one possible state the FST could be in. It is assumed our - // dictionary is deterministic when using it. - // (lest we'd have to check for multiple transitions at each state) + /* This makes the FST deterministic, meaning for any string input there's + * only one possible state the FST could be in. It is assumed our + * dictionary is deterministic when using it. + * (lest we'd have to check for multiple transitions at each state) + */ fst::Determinize(dictionary, new_dict); - // Finds the simplest equivalent fst. This is unnecessary but decreases - // memory usage of the dictionary + /* Finds the simplest equivalent fst. This is unnecessary but decreases + * memory usage of the dictionary + */ fst::Minimize(new_dict); this->dictionary = new_dict; } diff --git a/models/swig_decoders/scorer.h b/models/swig_decoders/scorer.h index b99a99b72..1b4857e38 100644 --- a/models/swig_decoders/scorer.h +++ b/models/swig_decoders/scorer.h @@ -23,14 +23,15 @@ class RetriveStrEnumerateVocab : public lm::EnumerateVocab { public: RetriveStrEnumerateVocab() {} - void Add(lm::WordIndex index, const StringPiece& str) { + void Add(lm::WordIndex index, const StringPiece &str) { vocabulary.push_back(std::string(str.data(), str.length())); } std::vector vocabulary; }; -/* External scorer to query languange score for n-gram or sentence. +/* External scorer to query score for n-gram or sentence, including language + * model scoring and word insertion. * * Example: * Scorer scorer(alpha, beta, "path_of_language_model"); @@ -39,12 +40,12 @@ public: */ class Scorer { public: - Scorer(double alpha, double beta, const std::string& lm_path); + Scorer(double alpha, double beta, const std::string &lm_path); ~Scorer(); - double get_log_cond_prob(const std::vector& words); + double get_log_cond_prob(const std::vector &words); - double get_sent_log_prob(const std::vector& words); + double get_sent_log_prob(const std::vector &words); size_t get_max_order() { return _max_order; } @@ -56,32 +57,32 @@ public: void reset_params(float alpha, float beta); // make ngram - std::vector make_ngram(PathTrie* prefix); + std::vector make_ngram(PathTrie *prefix); // fill dictionary for fst void fill_dictionary(bool add_space); // set char map - void set_char_map(const std::vector& char_list); + void set_char_map(const std::vector &char_list); - std::vector split_labels(const std::vector& labels); + std::vector split_labels(const std::vector &labels); // expose to decoder double alpha; double beta; // fst dictionary - void* dictionary; + void *dictionary; protected: - void load_LM(const char* filename); + void load_LM(const char *filename); - double get_log_prob(const std::vector& words); + double get_log_prob(const std::vector &words); - std::string vec2str(const std::vector& input); + std::string vec2str(const std::vector &input); private: - void* _language_model; + void *_language_model; bool _is_character_based; size_t _max_order; From a00a436b528d33cc8d6e8b78c9f801c635c6f62e Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Sun, 10 Sep 2017 11:00:16 +0800 Subject: [PATCH 181/335] Rewrite README.md doc (50%) and correct some bugs. --- README.md | 274 ++++++++++++++---- examples/librispeech/prepare_data.sh | 9 +- .../librispeech/{generate.sh => run_infer.sh} | 1 - examples/librispeech/run_train.sh | 2 +- examples/librispeech_tiny/prepare_data.sh | 39 +++ examples/librispeech_tiny/run_infer.sh | 27 ++ examples/librispeech_tiny/run_test.sh | 28 ++ examples/librispeech_tiny/run_train.sh | 30 ++ examples/librispeech_tiny/run_tune.sh | 30 ++ tools/build_vocab.py | 8 +- tools/compute_mean_std.py | 4 +- 11 files changed, 388 insertions(+), 64 deletions(-) rename examples/librispeech/{generate.sh => run_infer.sh} (97%) create mode 100644 examples/librispeech_tiny/prepare_data.sh create mode 100644 examples/librispeech_tiny/run_infer.sh create mode 100644 examples/librispeech_tiny/run_test.sh create mode 100644 examples/librispeech_tiny/run_train.sh create mode 100644 examples/librispeech_tiny/run_tune.sh diff --git a/README.md b/README.md index 1962c1ccb..2f51a5fc5 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,7 @@ - [Inference and Evaluation](#inference-and-evaluation) - [Distributed Cloud Training](#distributed-cloud-training) - [Hyper-parameters Tuning](#hyper-parameters-tuning) +- [Training for Mandarin Language](#training-for-mandarin-language) - [Trying Live Demo with Your Own Voice](#trying-live-demo-with-your-own-voice) - [Experiments and Benchmarks](#experiments-and-benchmarks) - [Questions and Help](#questions-and-help) @@ -21,7 +22,7 @@ ## Installation -Please install the [prerequisites](#prerequisites) above before moving on this. +Please install the [prerequisites](#prerequisites) above before moving onto this quick installation. ``` git clone https://github.com/PaddlePaddle/models.git @@ -31,138 +32,299 @@ sh setup.sh ## Getting Started -TODO +Several shell scripts provided in `./examples` will help us to quickly give it a try, including training, inferencing, evaluation and demo deployment. + +Most of the scripts in `./examples` are configured with 8 GPUs. If you don't have 8 GPUs available, please modify `CUDA_VISIBLE_DEVICE` and `--trainer_count`. If you don't have any GPU available, please set `--use_gpu` to False. + +Let's take a tiny sampled subset of [LibriSpeech dataset](http://www.openslr.org/12/) for instance. + +- Go to directory + + ``` + cd examples/librispeech_tiny + ``` + + Notice that this is only a toy example with a tiny sampled set of LibriSpeech. If we would like to try with the complete LibriSpeech (would take much a longer time for training), please go to `examples/librispeech` instead. +- Prepare the libripseech data + + ``` + sh preprare_data.sh + ``` + + `prepare_data.sh` downloads dataset, generates file manifests, collects normalizer' statitics and builds vocabulary for us. Once the running is done, we'll find our LibriSpeech data (not full in this "tiny" example) downloaded in `~/.cache/paddle/dataset/speech/Libri` and several manifest files as well as one mean stddev file generated in `./data/librispeech_tiny`, for the further model training. It needs to be run for only once. +- Train your own ASR model + + ``` + sh run_train.sh + ``` + + `run_train.sh` starts a training job, with training logs printed to stdout and model checkpoint of every pass/epoch saved to `./checkpoints`. We can resume the training from these checkpoints, or use them for inference, evalutiaton and deployment. +- Case inference with an existing model + + ``` + sh run_infer.sh + ``` + + `run_infer.sh` will quickly show us speech-to-text decoding results for several (default: 10) audio samples with an existing model. Since the model is only trained on a subset of LibriSpeech, the performance might not be very good. We can download a well-trained model and then do the inference: + + ``` + sh download_model_run_infer.sh + ``` +- Evaluate an existing model + + ``` + sh run_test.sh + ``` + + `run_test.sh` evaluates the model with Word Error Rate (or Character Error Rate) measurement. Similarly, we can also download a well-trained model and test its performance: + + ``` + sh download_model_run_test.sh + ``` +- Try out a live demo with your own voice + + Until now, we have trained and tested an ASR model quantitively and qualitatively with existing audios. But we haven't try the model with our own speech. `demo_server.sh` and `demo_client.sh` helps quickly build up a demo ASR engine with the trained model, enabling us to test and play around with the demo with our own voice. + + We start the server in one console by entering: + + ``` + sh run_demo_server.sh + ``` + + and start the client in another console by entering: + + ``` + sh run_demo_client.sh + ``` + + Then, in the client console, press the `whitespace` key, hold, and start speaking. Until we finish our ulterance, we release the key to let the speech-to-text results show in the console. + + Notice that `run_demo_client.sh` must be run in a machine with a microphone device, while `run_demo_server.sh` could be run in one without any audio recording device, e.g. any remote server. Just be careful to update `run_demo_server.sh` and `run_demo_client.sh` with the actual accessable IP address and port, if the server and client are running with two seperate machines. Nothing has to be done if running in one single machine. + + This demo will first download a pre-trained Mandarin model (trained with 3000 hours of internal speech data). If we would like to try some other model, just update `model_path` argument in the script.   +     +More detailed information are provided in the following sections. + +Wish you a happy journey with the DeepSpeech2 ASR engine! + ## Data Preparation +#### Generate Manifest + +*DeepSpeech2 on PaddlePaddle* accepts a textual **manifest** file as its data set interface. A manifest file summarizes a set of speech data, with each line containing the meta data (e.g. filepath, transcription, duration) of one audio clip, in [JSON](http://www.json.org/) format, just as: + ``` -cd datasets -sh run_all.sh -cd .. +{"audio_filepath": "/home/work/.cache/paddle/Libri/134686/1089-134686-0001.flac", "duration": 3.275, "text": "stuff it into you his belly counselled him"} +{"audio_filepath": "/home/work/.cache/paddle/Libri/134686/1089-134686-0007.flac", "duration": 4.275, "text": "a cold lucid indifference reigned in his soul"} ``` -`sh run_all.sh` prepares all ASR datasets (currently, only LibriSpeech available). After running, we have several summarization manifest files in json-format. +To use any custom data, we only need to generate such manifest files to summarize the dataset. Given such summarized manifests, training, inference and all other modules can be aware of where to access the audio files, as well as their meta data including the transcription labels. -A manifest file summarizes a speech data set, with each line containing the meta data (i.e. audio filepath, transcript text, audio duration) of each audio file within the data set, in json format. Manifest file serves as an interface informing our system of where and what to read the speech samples. +For example script to generate such manifest files, please refer to `data/librispeech/librispeech.py`, which download and generate manifests for LibriSpeech dataset. +#### Compute Mean & Stddev for Normalizer -More help for arguments: +To perform z-score normalization (zero-mean, unit stddev) upon audio features, we have to estimate in advance the mean and standard deviation of the features, with sampled training audios: ``` -python datasets/librispeech/librispeech.py --help +python tools/compute_mean_std.py \ +--num_samples 2000 \ +--specgram_type linear \ +--manifest_paths data/librispeech/manifest.train \ +--output_path data/librispeech/mean_std.npz ``` +It will compute the mean and standard deviation of power spectgram feature with 2000 random sampled audio clips listed in `data/librispeech/manifest.train` and save the results to `data/librispeech/mean_std.npz` for further usage. -``` -python tools/compute_mean_std.py -``` +#### Build Vocabulary -It will compute mean and stdandard deviation for audio features, and save them to a file with a default name `./mean_std.npz`. This file will be used in both training and inferencing. The default feature of audio data is power spectrum, and the mfcc feature is also supported. To train and infer based on mfcc feature, please generate this file by +A list of possible characters is required to convert the target transcription into list of token indices for training and in docoders convert from them back to text. Such a character-based vocabulary can be build with `tools/build_vocab.py`. ``` -python tools/compute_mean_std.py --specgram_type mfcc +python tools/build_vocab.py \ +--count_threshold 0 \ +--vocab_path data/librispeech/eng_vocab.txt \ +--manifest_paths data/librispeech/manifest.train ``` -and specify ```--specgram_type mfcc``` when running train.py, infer.py, evaluator.py or tune.py. +It will build a vocabuary file of `data/librispeeech/eng_vocab.txt` with all transcription text in `data/librispeech/manifest.train`, without character truncation. + +#### More Help -More help for arguments: +For more help on arguments: ``` +python data/librispeech/librispeech.py --help python tools/compute_mean_std.py --help +python tools/build_vocab.py --help ``` ## Training a model -For GPU Training: +`train.py` is the main caller of the training module. We list several usage below. -``` -CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python train.py -``` +- Start training from scratch with 8 GPUs: -For CPU Training: + ``` + CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python train.py --trainer_count 8 + ``` -``` -python train.py --use_gpu False -``` +- Start training from scratch with 16 CPUs: + + ``` + python train.py --use_gpu False --trainer_count 16 + ``` +- Resume training from a checkpoint (an existing model): + + ``` + CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python train.py \ + --init_model_path CHECKPOINT_PATH_TO_RESUME_FROM + ``` -More help for arguments: +For more help on arguments: ``` python train.py --help ``` +or refer to `example/librispeech/run_train.sh. -### Inference and Evaluation +#### Augment the Dataset for Training -The following steps, inference, parameters tuning and evaluating, will require a language model during decoding. -A compressed language model is provided and can be accessed by +Data augmentation has often been a highly effective technique to boost the deep learning performance. We augment our speech data by synthesizing new audios with small random perterbation (label-invariant transformation) added upon raw audios. We don't have to do the syntheses by ourselves, as it is already embeded into the data provider and is done on the fly, randomly for each epoch. + +Six optional augmentation components are provided for us to configured and inserted into the processing pipeline. + + - Volume Perturbation + - Speed Perturbation + - Shifting Perturbation + - Online Beyesian normalization + - Noise Perturbation (need background noise audio files) + - Impulse Response (need impulse audio files) + +In order to inform the trainer of what augmentation components we need and what their processing orders are, we are required to prepare a *augmentation configuration file* in JSON format. For example: ``` -cd ./lm -sh run.sh -cd .. +[{ + "type": "speed", + "params": {"min_speed_rate": 0.95, + "max_speed_rate": 1.05}, + "prob": 0.6 +}, +{ + "type": "shift", + "params": {"min_shift_ms": -5, + "max_shift_ms": 5}, + "prob": 0.8 +}] ``` +When the `--augment_conf_file` argument of `trainer.py` is set to the path of the above example configuration file, each audio clip in each epoch will be processed: with 60% of chance, it will first be speed perturbed with a uniformly random sampled speed-rate between 0.95 and 1.05, and then with 80% of chance it will be shifted in time with a random sampled offset between -5 ms and 5 ms. Finally this newly synthesized audio clip will be feed into the feature extractor for further training. +For configuration examples, please refer to `conf/augmenatation.config.example`. -For GPU inference +Be careful when we are utilizing the data augmentation technique, as improper augmentation will instead do harm to the training, due to the enlarged train-test gap. -``` -CUDA_VISIBLE_DEVICES=0 python infer.py -``` +## Inference and Evaluation -For CPU inference +#### Prepare Language Model -``` -python infer.py --use_gpu=False -``` +A language model is required to improve the decoder's performance. We have prepared two language models (with lossy compression) for users to download and try. One is for English and the other is for Mandarin. Please refer to `examples/librispeech/download_model.sh` and `examples/mandarin_demo/download_model.sh` for their urls. If you wish to train your own better language model, please refer to [KenLM](https://github.com/kpu/kenlm) for tutorials. + +TODO: any other requirements or tips to add? + +#### Speech-to-text Inference + +We provide a inference module `infer.py` to infer, decode and visualize speech-to-text results for several given audio clips, which might help to have a intuitive and qualitative evaluation of the ASR model performance. + +- Inference with GPU: + + ``` + CUDA_VISIBLE_DEVICES=0 python infer.py --trainer_count 1 + ``` -More help for arguments: +- Inference with CPU: + + ``` + python infer.py --use_gpu False + ``` + +We provide two CTC decoders: *CTC greedy decoder* and *CTC beam search decoder*. The *CTC greedy decoder* is an implementation of the simple best-path decoding algorithm, selecting at each timestep the most likely token, thus being greedy and locally optimal. The [*CTC beam search decoder*](https://arxiv.org/abs/1408.2873) otherwise utilzied a heuristic breadth-first gragh search for arriving at a near global optimality; it requires a pre-trained KenLM language model for better scoring and ranking sentences. The decoder type can be set with argument `--decoding_method`. + +For more help on arguments: ``` python infer.py --help ``` +or refer to `example/librispeech/run_infer.sh. +#### Evaluate a Model -``` -CUDA_VISIBLE_DEVICES=0 python evaluate.py -``` +To evaluate a model quantitively, we can run: + +- Evaluation with GPU: + + ``` + CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python test.py --trainer_count 8 + ``` + +- Evaluation with CPU: -More help for arguments: + ``` + python test.py --use_gpu False + ``` + +The error rate (default: word error rate, can be set with `--error_rate_type`) will be printed. + +For more help on arguments: ``` -python evaluate.py --help +python test.py --help ``` +or refer to `example/librispeech/run_test.sh. ## Hyper-parameters Tuning -Usually, the parameters $\alpha$ and $\beta$ for the CTC [prefix beam search](https://arxiv.org/abs/1408.2873) decoder need to be tuned after retraining the acoustic model. +The hyper-parameters $\alpha$ (coefficient for language model scorer) and $\beta$ (coefficient for word count scorer) for the [*CTC beam search decoder*](https://arxiv.org/abs/1408.2873) often have a significant impact on the decoder's performance. It'd be better to re-tune them on validation samples after the accustic model is renewed. -For GPU tuning +`tools/tune.py` performs a 2-D grid search over the hyper-parameter $\alpha$ and $\beta$. We have to provide the range of $\alpha$ and $\beta$, as well as the number of attempts. -``` -CUDA_VISIBLE_DEVICES=0 python tune.py -``` +- Tuning with GPU: -For CPU tuning + ``` + CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python tools/tune.py \ + --trainer_count 8 \ + --alpha_from 0.1 \ + --alpha_to 0.36 \ + --num_alphas 14 \ + --beta_from 0.05 \ + --beta_to 1.0 \ + --num_betas 20 + ``` -``` -python tune.py --use_gpu=False -``` +- Tuning with CPU: -More help for arguments: + ``` + python tools/tune.py --use_gpu False + ``` + +After tuning, we can reset $\alpha$ and $\beta$ in the inference and evaluation modules to see if they can really improve the ASR performance. ``` python tune.py --help ``` +or refer to `example/librispeech/run_tune.sh. -Then reset parameters with the tuning result before inference or evaluating. +TODO: add figure. ## Distributed Cloud Training If you wish to train DeepSpeech2 on PaddleCloud, please refer to [Train DeepSpeech2 on PaddleCloud](https://github.com/PaddlePaddle/models/tree/develop/deep_speech_2/cloud). +## Training for Mandarin Language + ## Trying Live Demo with Your Own Voice A real-time ASR demo is built for users to try out the ASR model with their own voice. Please do the following installation on the machine you'd like to run the demo's client (no need for the machine running the demo's server). diff --git a/examples/librispeech/prepare_data.sh b/examples/librispeech/prepare_data.sh index 162a38c49..a18402ea3 100644 --- a/examples/librispeech/prepare_data.sh +++ b/examples/librispeech/prepare_data.sh @@ -13,7 +13,14 @@ if [ $? -ne 0 ]; then exit 1 fi -#cat data/librispeech/manifest.train* | shuf > data/librispeech/manifest.train +cat data/librispeech/manifest.train* | shuf > data/librispeech/manifest.train + + +# build vocabulary (for English data, we can just skip this) +# python tools/build_vocab.py \ +# --count_threshold=0 \ +# --vocab_path='data/librispeech/eng_vocab.txt' \ +# --manifest_paths='data/librispeech/manifeset.train' # compute mean and stddev for normalizer diff --git a/examples/librispeech/generate.sh b/examples/librispeech/run_infer.sh similarity index 97% rename from examples/librispeech/generate.sh rename to examples/librispeech/run_infer.sh index a34b7bc10..619d546e8 100644 --- a/examples/librispeech/generate.sh +++ b/examples/librispeech/run_infer.sh @@ -8,7 +8,6 @@ python -u infer.py \ --trainer_count=1 \ --beam_size=500 \ --num_proc_bsearch=12 \ ---num_proc_data=12 \ --num_conv_layers=2 \ --num_rnn_layers=3 \ --rnn_layer_size=2048 \ diff --git a/examples/librispeech/run_train.sh b/examples/librispeech/run_train.sh index 832838a81..14672167c 100644 --- a/examples/librispeech/run_train.sh +++ b/examples/librispeech/run_train.sh @@ -6,7 +6,7 @@ CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ python -u train.py \ --batch_size=256 \ --trainer_count=8 \ ---num_passes=200 \ +--num_passes=50 \ --num_proc_data=12 \ --num_conv_layers=2 \ --num_rnn_layers=3 \ diff --git a/examples/librispeech_tiny/prepare_data.sh b/examples/librispeech_tiny/prepare_data.sh new file mode 100644 index 000000000..a18402ea3 --- /dev/null +++ b/examples/librispeech_tiny/prepare_data.sh @@ -0,0 +1,39 @@ +#! /usr/bin/bash + +pushd ../.. + +# download data, generate manifests +python data/librispeech/librispeech.py \ +--manifest_prefix='data/librispeech/manifest' \ +--full_download='True' \ +--target_dir='~/.cache/paddle/dataset/speech/Libri' + +if [ $? -ne 0 ]; then + echo "Prepare LibriSpeech failed. Terminated." + exit 1 +fi + +cat data/librispeech/manifest.train* | shuf > data/librispeech/manifest.train + + +# build vocabulary (for English data, we can just skip this) +# python tools/build_vocab.py \ +# --count_threshold=0 \ +# --vocab_path='data/librispeech/eng_vocab.txt' \ +# --manifest_paths='data/librispeech/manifeset.train' + + +# compute mean and stddev for normalizer +python tools/compute_mean_std.py \ +--manifest_path='data/librispeech/manifest.train' \ +--num_samples=2000 \ +--specgram_type='linear' \ +--output_path='data/librispeech/mean_std.npz' + +if [ $? -ne 0 ]; then + echo "Compute mean and stddev failed. Terminated." + exit 1 +fi + + +echo "LibriSpeech Data preparation done." diff --git a/examples/librispeech_tiny/run_infer.sh b/examples/librispeech_tiny/run_infer.sh new file mode 100644 index 000000000..619d546e8 --- /dev/null +++ b/examples/librispeech_tiny/run_infer.sh @@ -0,0 +1,27 @@ +#! /usr/bin/bash + +pushd ../.. + +CUDA_VISIBLE_DEVICES=0 \ +python -u infer.py \ +--num_samples=10 \ +--trainer_count=1 \ +--beam_size=500 \ +--num_proc_bsearch=12 \ +--num_conv_layers=2 \ +--num_rnn_layers=3 \ +--rnn_layer_size=2048 \ +--alpha=0.36 \ +--beta=0.25 \ +--cutoff_prob=0.99 \ +--use_gru=False \ +--use_gpu=True \ +--share_rnn_weights=True \ +--infer_manifest='data/librispeech/manifest.dev-clean' \ +--mean_std_path='data/librispeech/mean_std.npz' \ +--vocab_path='data/librispeech/eng_vocab.txt' \ +--model_path='checkpoints/params.latest.tar.gz' \ +--lang_model_path='lm/data/common_crawl_00.prune01111.trie.klm' \ +--decoding_method='ctc_beam_search' \ +--error_rate_type='wer' \ +--specgram_type='linear' diff --git a/examples/librispeech_tiny/run_test.sh b/examples/librispeech_tiny/run_test.sh new file mode 100644 index 000000000..5a14cb682 --- /dev/null +++ b/examples/librispeech_tiny/run_test.sh @@ -0,0 +1,28 @@ +#! /usr/bin/bash + +pushd ../.. + +CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ +python -u evaluate.py \ +--batch_size=128 \ +--trainer_count=8 \ +--beam_size=500 \ +--num_proc_bsearch=12 \ +--num_proc_data=12 \ +--num_conv_layers=2 \ +--num_rnn_layers=3 \ +--rnn_layer_size=2048 \ +--alpha=0.36 \ +--beta=0.25 \ +--cutoff_prob=0.99 \ +--use_gru=False \ +--use_gpu=True \ +--share_rnn_weights=True \ +--test_manifest='data/librispeech/manifest.test-clean' \ +--mean_std_path='data/librispeech/mean_std.npz' \ +--vocab_path='data/librispeech/eng_vocab.txt' \ +--model_path='checkpoints/params.latest.tar.gz' \ +--lang_model_path='lm/data/common_crawl_00.prune01111.trie.klm' \ +--decoding_method='ctc_beam_search' \ +--error_rate_type='wer' \ +--specgram_type='linear' diff --git a/examples/librispeech_tiny/run_train.sh b/examples/librispeech_tiny/run_train.sh new file mode 100644 index 000000000..14672167c --- /dev/null +++ b/examples/librispeech_tiny/run_train.sh @@ -0,0 +1,30 @@ +#! /usr/bin/bash + +pushd ../.. + +CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ +python -u train.py \ +--batch_size=256 \ +--trainer_count=8 \ +--num_passes=50 \ +--num_proc_data=12 \ +--num_conv_layers=2 \ +--num_rnn_layers=3 \ +--rnn_layer_size=2048 \ +--num_iter_print=100 \ +--learning_rate=5e-4 \ +--max_duration=27.0 \ +--min_duration=0.0 \ +--use_sortagrad=True \ +--use_gru=False \ +--use_gpu=True \ +--is_local=True \ +--share_rnn_weights=True \ +--train_manifest='data/librispeech/manifest.train' \ +--dev_manifest='data/librispeech/manifest.dev' \ +--mean_std_path='data/librispeech/mean_std.npz' \ +--vocab_path='data/librispeech/eng_vocab.txt' \ +--output_model_dir='./checkpoints' \ +--augment_conf_path='conf/augmentation.config' \ +--specgram_type='linear' \ +--shuffle_method='batch_shuffle_clipped' diff --git a/examples/librispeech_tiny/run_tune.sh b/examples/librispeech_tiny/run_tune.sh new file mode 100644 index 000000000..9d992e884 --- /dev/null +++ b/examples/librispeech_tiny/run_tune.sh @@ -0,0 +1,30 @@ +#! /usr/bin/bash + +pushd ../.. + +CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ +python -u tools/tune.py \ +--num_samples=100 \ +--trainer_count=8 \ +--beam_size=500 \ +--num_proc_bsearch=12 \ +--num_conv_layers=2 \ +--num_rnn_layers=3 \ +--rnn_layer_size=2048 \ +--num_alphas=14 \ +--num_betas=20 \ +--alpha_from=0.1 \ +--alpha_to=0.36 \ +--beta_from=0.05 \ +--beta_to=1.0 \ +--cutoff_prob=0.99 \ +--use_gru=False \ +--use_gpu=True \ +--share_rnn_weights=True \ +--tune_manifest='data/librispeech/manifest.dev-clean' \ +--mean_std_path='data/librispeech/mean_std.npz' \ +--vocab_path='data/librispeech/eng_vocab.txt' \ +--model_path='checkpoints/params.latest.tar.gz' \ +--lang_model_path='lm/data/common_crawl_00.prune01111.trie.klm' \ +--error_rate_type='wer' \ +--specgram_type='linear' diff --git a/tools/build_vocab.py b/tools/build_vocab.py index 6fbb9bdfc..ef9bde49f 100644 --- a/tools/build_vocab.py +++ b/tools/build_vocab.py @@ -21,8 +21,10 @@ add_arg = functools.partial(add_arguments, argparser=parser) # yapf: disable add_arg('count_threshold', int, 0, "Truncation threshold for char counts.") add_arg('vocab_path', str, - 'datasets/vocab/zh_vocab.txt', - "Filepath to write the vocabulary.") + None, + "Filepath to write the vocabulary.", + nargs='+', + required=True) add_arg('manifest_paths', str, None, "Filepaths of manifests for building vocabulary. " @@ -34,7 +36,7 @@ args = parser.parse_args() def count_manifest(counter, manifest_path): - manifest_jsons = utils.read_manifest(manifest_path) + manifest_jsons = read_manifest(manifest_path) for line_json in manifest_jsons: for char in line_json['text']: counter.update(char) diff --git a/tools/compute_mean_std.py b/tools/compute_mean_std.py index 5bb6be39d..11aa856d7 100644 --- a/tools/compute_mean_std.py +++ b/tools/compute_mean_std.py @@ -20,10 +20,10 @@ add_arg('specgram_type', str, "Audio feature type. Options: linear, mfcc.", choices=['linear', 'mfcc']) add_arg('manifest_path', str, - 'datasets/manifest.train', + 'data/librispeech/manifest.train', "Filepath of manifest to compute normalizer's mean and stddev.") add_arg('output_path', str, - 'mean_std.npz', + 'data/librispeech/mean_std.npz', "Filepath of write mean and stddev to (.npz).") # yapf: disable args = parser.parse_args() From ae7ef7929a0bce79c5de03366840711e8e77f5b6 Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Sun, 10 Sep 2017 20:36:38 +0800 Subject: [PATCH 182/335] Rename some folders and update examples. --- data/librispeech/librispeech.py | 2 +- data/tiny/tiny.py | 126 ++++++++++++++++++ examples/librispeech/prepare_data.sh | 2 +- examples/librispeech_tiny/prepare_data.sh | 39 ------ examples/tiny/run_data.sh | 45 +++++++ .../{librispeech_tiny => tiny}/run_infer.sh | 12 +- .../{librispeech_tiny => tiny}/run_test.sh | 0 .../{librispeech_tiny => tiny}/run_train.sh | 20 +-- .../{librispeech_tiny => tiny}/run_tune.sh | 0 infer.py | 6 +- {lm => model_utils}/__init__.py | 0 {models => model_utils}/decoder.py | 2 + {lm => model_utils}/lm_scorer.py | 0 {models => model_utils}/model.py | 7 +- {models => model_utils}/network.py | 0 .../tests/test_decoders.py | 2 +- models/__init__.py | 0 lm/run.sh => models/lm/download_en.sh | 3 - test.py | 6 +- tools/build_vocab.py | 6 +- tools/tune.py | 6 +- train.py | 4 +- 22 files changed, 209 insertions(+), 79 deletions(-) create mode 100644 data/tiny/tiny.py delete mode 100644 examples/librispeech_tiny/prepare_data.sh create mode 100644 examples/tiny/run_data.sh rename examples/{librispeech_tiny => tiny}/run_infer.sh (58%) rename examples/{librispeech_tiny => tiny}/run_test.sh (100%) rename examples/{librispeech_tiny => tiny}/run_train.sh (56%) rename examples/{librispeech_tiny => tiny}/run_tune.sh (100%) rename {lm => model_utils}/__init__.py (100%) rename {models => model_utils}/decoder.py (99%) rename {lm => model_utils}/lm_scorer.py (100%) rename {models => model_utils}/model.py (97%) rename {models => model_utils}/network.py (100%) rename {models => model_utils}/tests/test_decoders.py (99%) delete mode 100644 models/__init__.py rename lm/run.sh => models/lm/download_en.sh (99%) diff --git a/data/librispeech/librispeech.py b/data/librispeech/librispeech.py index d963a7d53..14a3804e2 100644 --- a/data/librispeech/librispeech.py +++ b/data/librispeech/librispeech.py @@ -41,7 +41,7 @@ MD5_TRAIN_OTHER_500 = "d1a0fd59409feb2c614ce4d30c387708" parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( "--target_dir", - default=DATA_HOME + "/Libri", + default=DATA_HOME + "/libri", type=str, help="Directory to save the dataset. (default: %(default)s)") parser.add_argument( diff --git a/data/tiny/tiny.py b/data/tiny/tiny.py new file mode 100644 index 000000000..8ba2a13c5 --- /dev/null +++ b/data/tiny/tiny.py @@ -0,0 +1,126 @@ +"""Prepare Librispeech ASR datasets. + +Download, unpack and create manifest files. +Manifest file is a json-format file with each line containing the +meta data (i.e. audio filepath, transcript and audio duration) +of each audio file in the data set. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import distutils.util +import os +import sys +import tarfile +import argparse +import soundfile +import json +import codecs +from paddle.v2.dataset.common import md5file + +DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech') + +URL_ROOT = "http://www.openslr.org/resources/12" +URL_DEV_CLEAN = URL_ROOT + "/dev-clean.tar.gz" +MD5_DEV_CLEAN = "42e2234ba48799c1f50f24a7926300a1" + +parser = argparse.ArgumentParser(description=__doc__) +parser.add_argument( + "--target_dir", + default=DATA_HOME + "/tiny", + type=str, + help="Directory to save the dataset. (default: %(default)s)") +parser.add_argument( + "--manifest_prefix", + default="manifest", + type=str, + help="Filepath prefix for output manifests. (default: %(default)s)") +args = parser.parse_args() + + +def download(url, md5sum, target_dir): + """ + Download file from url to target_dir, and check md5sum. + """ + if not os.path.exists(target_dir): os.makedirs(target_dir) + filepath = os.path.join(target_dir, url.split("/")[-1]) + if not (os.path.exists(filepath) and md5file(filepath) == md5sum): + print("Downloading %s ..." % url) + os.system("wget -c " + url + " -P " + target_dir) + print("\nMD5 Chesksum %s ..." % filepath) + if not md5file(filepath) == md5sum: + raise RuntimeError("MD5 checksum failed.") + else: + print("File exists, skip downloading. (%s)" % filepath) + return filepath + + +def unpack(filepath, target_dir): + """ + Unpack the file to the target_dir. + """ + print("Unpacking %s ..." % filepath) + tar = tarfile.open(filepath) + tar.extractall(target_dir) + tar.close() + + +def create_manifest(data_dir, manifest_path): + """ + Create a manifest json file summarizing the data set, with each line + containing the meta data (i.e. audio filepath, transcription text, audio + duration) of each audio file within the data set. + """ + print("Creating manifest %s ..." % manifest_path) + json_lines = [] + for subfolder, _, filelist in sorted(os.walk(data_dir)): + text_filelist = [ + filename for filename in filelist if filename.endswith('trans.txt') + ] + if len(text_filelist) > 0: + text_filepath = os.path.join(data_dir, subfolder, text_filelist[0]) + for line in open(text_filepath): + segments = line.strip().split() + text = ' '.join(segments[1:]).lower() + audio_filepath = os.path.join(data_dir, subfolder, + segments[0] + '.flac') + audio_data, samplerate = soundfile.read(audio_filepath) + duration = float(len(audio_data)) / samplerate + json_lines.append( + json.dumps({ + 'audio_filepath': audio_filepath, + 'duration': duration, + 'text': text + })) + with codecs.open(manifest_path, 'w', 'utf-8') as out_file: + for line in json_lines: + out_file.write(line + '\n') + + +def prepare_dataset(url, md5sum, target_dir, manifest_path): + """ + Download, unpack and create summmary manifest file. + """ + if not os.path.exists(os.path.join(target_dir, "LibriSpeech")): + # download + filepath = download(url, md5sum, target_dir) + # unpack + unpack(filepath, target_dir) + else: + print("Skip downloading and unpacking. Data already exists in %s." % + target_dir) + # create manifest json file + create_manifest(target_dir, manifest_path) + + +def main(): + prepare_dataset( + url=URL_DEV_CLEAN, + md5sum=MD5_DEV_CLEAN, + target_dir=os.path.join(args.target_dir, "dev-clean"), + manifest_path=args.manifest_prefix + ".dev-clean") + + +if __name__ == '__main__': + main() diff --git a/examples/librispeech/prepare_data.sh b/examples/librispeech/prepare_data.sh index a18402ea3..6e9997703 100644 --- a/examples/librispeech/prepare_data.sh +++ b/examples/librispeech/prepare_data.sh @@ -16,7 +16,7 @@ fi cat data/librispeech/manifest.train* | shuf > data/librispeech/manifest.train -# build vocabulary (for English data, we can just skip this) +# build vocabulary (can be skipped for English, as already provided) # python tools/build_vocab.py \ # --count_threshold=0 \ # --vocab_path='data/librispeech/eng_vocab.txt' \ diff --git a/examples/librispeech_tiny/prepare_data.sh b/examples/librispeech_tiny/prepare_data.sh deleted file mode 100644 index a18402ea3..000000000 --- a/examples/librispeech_tiny/prepare_data.sh +++ /dev/null @@ -1,39 +0,0 @@ -#! /usr/bin/bash - -pushd ../.. - -# download data, generate manifests -python data/librispeech/librispeech.py \ ---manifest_prefix='data/librispeech/manifest' \ ---full_download='True' \ ---target_dir='~/.cache/paddle/dataset/speech/Libri' - -if [ $? -ne 0 ]; then - echo "Prepare LibriSpeech failed. Terminated." - exit 1 -fi - -cat data/librispeech/manifest.train* | shuf > data/librispeech/manifest.train - - -# build vocabulary (for English data, we can just skip this) -# python tools/build_vocab.py \ -# --count_threshold=0 \ -# --vocab_path='data/librispeech/eng_vocab.txt' \ -# --manifest_paths='data/librispeech/manifeset.train' - - -# compute mean and stddev for normalizer -python tools/compute_mean_std.py \ ---manifest_path='data/librispeech/manifest.train' \ ---num_samples=2000 \ ---specgram_type='linear' \ ---output_path='data/librispeech/mean_std.npz' - -if [ $? -ne 0 ]; then - echo "Compute mean and stddev failed. Terminated." - exit 1 -fi - - -echo "LibriSpeech Data preparation done." diff --git a/examples/tiny/run_data.sh b/examples/tiny/run_data.sh new file mode 100644 index 000000000..44345d8cc --- /dev/null +++ b/examples/tiny/run_data.sh @@ -0,0 +1,45 @@ +#! /usr/bin/bash + +pushd ../.. + +# download data, generate manifests +python data/tiny/tiny.py \ +--manifest_prefix='data/tiny/manifest' \ +--target_dir=$HOME'/.cache/paddle/dataset/speech/tiny' + +if [ $? -ne 0 ]; then + echo "Prepare LibriSpeech failed. Terminated." + exit 1 +fi + +cat data/tiny/manifest.dev-clean | head -n 32 > data/tiny/manifest.train +cat data/tiny/manifest.dev-clean | head -n 48 | tail -n 16 > data/tiny/manifest.dev +cat data/tiny/manifest.dev-clean | head -n 64 | tail -n 16 > data/tiny/manifest.test + + +# build vocabulary +python tools/build_vocab.py \ +--count_threshold=0 \ +--vocab_path='data/tiny/vocab.txt' \ +--manifest_paths='data/tiny/manifest.train' + +if [ $? -ne 0 ]; then + echo "Build vocabulary failed. Terminated." + exit 1 +fi + + +# compute mean and stddev for normalizer +python tools/compute_mean_std.py \ +--manifest_path='data/tiny/manifest.train' \ +--num_samples=32 \ +--specgram_type='linear' \ +--output_path='data/tiny/mean_std.npz' + +if [ $? -ne 0 ]; then + echo "Compute mean and stddev failed. Terminated." + exit 1 +fi + + +echo "Tiny data preparation done." diff --git a/examples/librispeech_tiny/run_infer.sh b/examples/tiny/run_infer.sh similarity index 58% rename from examples/librispeech_tiny/run_infer.sh rename to examples/tiny/run_infer.sh index 619d546e8..f09bc6638 100644 --- a/examples/librispeech_tiny/run_infer.sh +++ b/examples/tiny/run_infer.sh @@ -4,7 +4,7 @@ pushd ../.. CUDA_VISIBLE_DEVICES=0 \ python -u infer.py \ ---num_samples=10 \ +--num_samples=4 \ --trainer_count=1 \ --beam_size=500 \ --num_proc_bsearch=12 \ @@ -17,11 +17,11 @@ python -u infer.py \ --use_gru=False \ --use_gpu=True \ --share_rnn_weights=True \ ---infer_manifest='data/librispeech/manifest.dev-clean' \ ---mean_std_path='data/librispeech/mean_std.npz' \ ---vocab_path='data/librispeech/eng_vocab.txt' \ ---model_path='checkpoints/params.latest.tar.gz' \ ---lang_model_path='lm/data/common_crawl_00.prune01111.trie.klm' \ +--infer_manifest='data/tiny/manifest.train' \ +--mean_std_path='data/tiny/mean_std.npz' \ +--vocab_path='data/tiny/vocab.txt' \ +--model_path='checkpoints/params.pass-14.tar.gz' \ +--lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \ --decoding_method='ctc_beam_search' \ --error_rate_type='wer' \ --specgram_type='linear' diff --git a/examples/librispeech_tiny/run_test.sh b/examples/tiny/run_test.sh similarity index 100% rename from examples/librispeech_tiny/run_test.sh rename to examples/tiny/run_test.sh diff --git a/examples/librispeech_tiny/run_train.sh b/examples/tiny/run_train.sh similarity index 56% rename from examples/librispeech_tiny/run_train.sh rename to examples/tiny/run_train.sh index 14672167c..7ca336876 100644 --- a/examples/librispeech_tiny/run_train.sh +++ b/examples/tiny/run_train.sh @@ -2,17 +2,17 @@ pushd ../.. -CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ +CUDA_VISIBLE_DEVICES=0,1 \ python -u train.py \ ---batch_size=256 \ ---trainer_count=8 \ ---num_passes=50 \ ---num_proc_data=12 \ +--batch_size=2 \ +--trainer_count=1 \ +--num_passes=10 \ +--num_proc_data=1 \ --num_conv_layers=2 \ --num_rnn_layers=3 \ --rnn_layer_size=2048 \ --num_iter_print=100 \ ---learning_rate=5e-4 \ +--learning_rate=5e-5 \ --max_duration=27.0 \ --min_duration=0.0 \ --use_sortagrad=True \ @@ -20,10 +20,10 @@ python -u train.py \ --use_gpu=True \ --is_local=True \ --share_rnn_weights=True \ ---train_manifest='data/librispeech/manifest.train' \ ---dev_manifest='data/librispeech/manifest.dev' \ ---mean_std_path='data/librispeech/mean_std.npz' \ ---vocab_path='data/librispeech/eng_vocab.txt' \ +--train_manifest='data/tiny/manifest.train' \ +--dev_manifest='data/tiny/manifest.train' \ +--mean_std_path='data/tiny/mean_std.npz' \ +--vocab_path='data/tiny/vocab.txt' \ --output_model_dir='./checkpoints' \ --augment_conf_path='conf/augmentation.config' \ --specgram_type='linear' \ diff --git a/examples/librispeech_tiny/run_tune.sh b/examples/tiny/run_tune.sh similarity index 100% rename from examples/librispeech_tiny/run_tune.sh rename to examples/tiny/run_tune.sh diff --git a/infer.py b/infer.py index 1ce969ae0..73e200b49 100644 --- a/infer.py +++ b/infer.py @@ -7,7 +7,7 @@ import argparse import functools import paddle.v2 as paddle from data_utils.data import DataGenerator -from models.model import DeepSpeech2Model +from model_utils.model import DeepSpeech2Model from utils.error_rate import wer, cer from utils.utility import add_arguments, print_arguments @@ -35,10 +35,10 @@ add_arg('mean_std_path', str, 'data/librispeech/mean_std.npz', "Filepath of normalizer's mean & std.") add_arg('vocab_path', str, - 'data/librispeech/eng_vocab.txt', + 'data/librispeech/vocab.txt', "Filepath of vocabulary.") add_arg('lang_model_path', str, - 'lm/data/common_crawl_00.prune01111.trie.klm', + 'model_zoo/lm/common_crawl_00.prune01111.trie.klm', "Filepath for language model.") add_arg('model_path', str, './checkpoints/params.latest.tar.gz', diff --git a/lm/__init__.py b/model_utils/__init__.py similarity index 100% rename from lm/__init__.py rename to model_utils/__init__.py diff --git a/models/decoder.py b/model_utils/decoder.py similarity index 99% rename from models/decoder.py rename to model_utils/decoder.py index 61ead25c8..ffba2731a 100644 --- a/models/decoder.py +++ b/model_utils/decoder.py @@ -180,6 +180,8 @@ def ctc_beam_search_decoder(probs_seq, prob = prob * ext_scoring_func(result) log_prob = log(prob) beam_result.append((log_prob, result)) + else: + beam_result.append((float('-inf'), '')) ## output top beam_size decoding results beam_result = sorted(beam_result, key=lambda asd: asd[0], reverse=True) diff --git a/lm/lm_scorer.py b/model_utils/lm_scorer.py similarity index 100% rename from lm/lm_scorer.py rename to model_utils/lm_scorer.py diff --git a/models/model.py b/model_utils/model.py similarity index 97% rename from models/model.py rename to model_utils/model.py index 93c4c41bf..cf146f8ce 100644 --- a/models/model.py +++ b/model_utils/model.py @@ -8,9 +8,10 @@ import os import time import gzip import paddle.v2 as paddle -from lm.lm_scorer import LmScorer -from models.decoder import ctc_greedy_decoder, ctc_beam_search_decoder -from models.network import deep_speech_v2_network +from model_utils.lm_scorer import LmScorer +from model_utils.decoder import ctc_greedy_decoder, ctc_beam_search_decoder +from model_utils.decoder import ctc_beam_search_decoder_batch +from model_utils.network import deep_speech_v2_network class DeepSpeech2Model(object): diff --git a/models/network.py b/model_utils/network.py similarity index 100% rename from models/network.py rename to model_utils/network.py diff --git a/models/tests/test_decoders.py b/model_utils/tests/test_decoders.py similarity index 99% rename from models/tests/test_decoders.py rename to model_utils/tests/test_decoders.py index acce46af8..adf36eefc 100644 --- a/models/tests/test_decoders.py +++ b/model_utils/tests/test_decoders.py @@ -4,7 +4,7 @@ from __future__ import division from __future__ import print_function import unittest -from models import decoder +from model_utils import decoder class TestDecoders(unittest.TestCase): diff --git a/models/__init__.py b/models/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/lm/run.sh b/models/lm/download_en.sh similarity index 99% rename from lm/run.sh rename to models/lm/download_en.sh index 2108ea55f..5ca33c679 100644 --- a/lm/run.sh +++ b/models/lm/download_en.sh @@ -14,6 +14,3 @@ if [ $MD5 != $md5_tmp ]; then echo "Fail to download the language model!" exit 1 fi - - - diff --git a/test.py b/test.py index 747e40df8..791bfd585 100644 --- a/test.py +++ b/test.py @@ -7,7 +7,7 @@ import argparse import functools import paddle.v2 as paddle from data_utils.data import DataGenerator -from models.model import DeepSpeech2Model +from model_utils.model import DeepSpeech2Model from utils.error_rate import wer, cer from utils.utility import add_arguments, print_arguments @@ -36,14 +36,14 @@ add_arg('mean_std_path', str, 'data/librispeech/mean_std.npz', "Filepath of normalizer's mean & std.") add_arg('vocab_path', str, - 'data/librispeech/eng_vocab.txt', + 'data/librispeech/vocab.txt', "Filepath of vocabulary.") add_arg('model_path', str, './checkpoints/params.latest.tar.gz', "If None, the training starts from scratch, " "otherwise, it resumes from the pre-trained model.") add_arg('lang_model_path', str, - 'lm/data/common_crawl_00.prune01111.trie.klm', + 'model_zoo/lm/common_crawl_00.prune01111.trie.klm', "Filepath for language model.") add_arg('decoding_method', str, 'ctc_beam_search', diff --git a/tools/build_vocab.py b/tools/build_vocab.py index ef9bde49f..e167e92ad 100644 --- a/tools/build_vocab.py +++ b/tools/build_vocab.py @@ -21,10 +21,8 @@ add_arg = functools.partial(add_arguments, argparser=parser) # yapf: disable add_arg('count_threshold', int, 0, "Truncation threshold for char counts.") add_arg('vocab_path', str, - None, - "Filepath to write the vocabulary.", - nargs='+', - required=True) + 'data/librispeech/vocab.txt', + "Filepath to write the vocabulary.") add_arg('manifest_paths', str, None, "Filepaths of manifests for building vocabulary. " diff --git a/tools/tune.py b/tools/tune.py index 7a2379109..25e495f19 100644 --- a/tools/tune.py +++ b/tools/tune.py @@ -9,7 +9,7 @@ import functools import paddle.v2 as paddle import _init_paths from data_utils.data import DataGenerator -from models.model import DeepSpeech2Model +from model_utils.model import DeepSpeech2Model from utils.error_rate import wer from utils.utility import add_arguments, print_arguments @@ -41,10 +41,10 @@ add_arg('mean_std_path', str, 'data/librispeech/mean_std.npz', "Filepath of normalizer's mean & std.") add_arg('vocab_path', str, - 'data/librispeech/eng_vocab.txt', + 'data/librispeech/vocab.txt', "Filepath of vocabulary.") add_arg('lang_model_path', str, - 'lm/data/common_crawl_00.prune01111.trie.klm', + 'model_zoo/lm/common_crawl_00.prune01111.trie.klm', "Filepath for language model.") add_arg('model_path', str, './checkpoints/params.latest.tar.gz', diff --git a/train.py b/train.py index 4a7a0eda2..bbf1cd729 100644 --- a/train.py +++ b/train.py @@ -6,7 +6,7 @@ from __future__ import print_function import argparse import functools import paddle.v2 as paddle -from models.model import DeepSpeech2Model +from model_utils.model import DeepSpeech2Model from data_utils.data import DataGenerator from utils.utility import add_arguments, print_arguments @@ -41,7 +41,7 @@ add_arg('mean_std_path', str, 'data/librispeech/mean_std.npz', "Filepath of normalizer's mean & std.") add_arg('vocab_path', str, - 'data/librispeech/eng_vocab.txt', + 'data/librispeech/vocab.txt', "Filepath of vocabulary.") add_arg('init_model_path', str, None, From e11b735de5ba55f90f502c67026d94dd78e02226 Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Tue, 12 Sep 2017 00:51:13 +0800 Subject: [PATCH 183/335] Update examples scripts and REAME.md for DS2. --- README.md | 96 ++++++++++--------- data/librispeech/eng_vocab.txt | 28 ------ data/librispeech/librispeech.py | 31 +++--- deploy/demo_server.py | 2 +- .../{prepare_data.sh => run_data.sh} | 24 +++-- examples/librispeech/run_infer.sh | 30 ++++-- examples/librispeech/run_infer_golden.sh | 54 +++++++++++ examples/librispeech/run_test.sh | 32 +++++-- examples/librispeech/run_test_golden.sh | 55 +++++++++++ examples/librispeech/run_train.sh | 17 +++- examples/librispeech/run_tune.sh | 17 +++- examples/mandarin/run_demo_client.sh | 17 ++++ examples/mandarin/run_demo_server.sh | 53 ++++++++++ examples/tiny/run_data.sh | 18 ++-- examples/tiny/run_infer.sh | 28 +++++- examples/tiny/run_infer_golden.sh | 54 +++++++++++ examples/tiny/run_test.sh | 38 ++++++-- examples/tiny/run_test_golden.sh | 55 +++++++++++ examples/tiny/run_train.sh | 27 ++++-- examples/tiny/run_tune.sh | 21 ++-- models/librispeech/download_model.sh | 20 ++++ models/lm/download_en.sh | 16 ---- models/lm/download_lm_en.sh | 18 ++++ utils/utility.sh | 20 ++++ 24 files changed, 594 insertions(+), 177 deletions(-) delete mode 100644 data/librispeech/eng_vocab.txt rename examples/librispeech/{prepare_data.sh => run_data.sh} (57%) create mode 100644 examples/librispeech/run_infer_golden.sh create mode 100644 examples/librispeech/run_test_golden.sh create mode 100644 examples/mandarin/run_demo_client.sh create mode 100644 examples/mandarin/run_demo_server.sh create mode 100644 examples/tiny/run_infer_golden.sh create mode 100644 examples/tiny/run_test_golden.sh create mode 100644 models/librispeech/download_model.sh delete mode 100644 models/lm/download_en.sh create mode 100644 models/lm/download_lm_en.sh create mode 100644 utils/utility.sh diff --git a/README.md b/README.md index 2f51a5fc5..aae0dc6d8 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # DeepSpeech2 on PaddlePaddle -*DeepSpeech2 on PaddlePaddle* is an open-source implementation of end-to-end Automatic Speech Recognition (ASR) engine, based on [Baidu's Deep Speech 2 paper](http://proceedings.mlr.press/v48/amodei16.pdf), with [PaddlePaddle](https://github.com/PaddlePaddle/Paddle) platform. Our vision is to empower both industrial application and academic research on speech-to-text, via an easy-to-use, efficent and scalable integreted implementation, including training & inferencing module, distributed [PaddleCloud](https://github.com/PaddlePaddle/cloud) training, and demo deployment. Besides, several pre-trained models for both English and Mandarin speech are also released. +*DeepSpeech2 on PaddlePaddle* is an open-source implementation of end-to-end Automatic Speech Recognition (ASR) engine, based on [Baidu's Deep Speech 2 paper](http://proceedings.mlr.press/v48/amodei16.pdf), with [PaddlePaddle](https://github.com/PaddlePaddle/Paddle) platform. Our vision is to empower both industrial application and academic research on speech-to-text, via an easy-to-use, efficent and scalable integreted implementation, including training, inferencing & testing module, distributed [PaddleCloud](https://github.com/PaddlePaddle/cloud) training, and demo deployment. Besides, several pre-trained models for both English and Mandarin are also released. ## Table of Contents - [Prerequisites](#prerequisites) @@ -8,12 +8,14 @@ - [Getting Started](#getting-started) - [Data Preparation](#data-preparation) - [Training a Model](#training-a-model) +- [Data Augmentation Pipeline](#data-augmentation-pipeline) - [Inference and Evaluation](#inference-and-evaluation) - [Distributed Cloud Training](#distributed-cloud-training) - [Hyper-parameters Tuning](#hyper-parameters-tuning) - [Training for Mandarin Language](#training-for-mandarin-language) - [Trying Live Demo with Your Own Voice](#trying-live-demo-with-your-own-voice) - [Experiments and Benchmarks](#experiments-and-benchmarks) +- [Released Models](#released-models) - [Questions and Help](#questions-and-help) ## Prerequisites @@ -22,7 +24,7 @@ ## Installation -Please install the [prerequisites](#prerequisites) above before moving onto this quick installation. +Please install the [prerequisites](#prerequisites) above before moving on. ``` git clone https://github.com/PaddlePaddle/models.git @@ -32,43 +34,43 @@ sh setup.sh ## Getting Started -Several shell scripts provided in `./examples` will help us to quickly give it a try, including training, inferencing, evaluation and demo deployment. +Several shell scripts provided in `./examples` will help us to quickly give it a try, for most major modules, including data preparation, model training, case inference, model evaluation and demo deployment, with a few public dataset (e.g. [LibriSpeech](http://www.openslr.org/12/), [Aishell](https://github.com/kaldi-asr/kaldi/tree/master/egs/aishell)). Reading these examples will also help us understand how to make it work with our own data. -Most of the scripts in `./examples` are configured with 8 GPUs. If you don't have 8 GPUs available, please modify `CUDA_VISIBLE_DEVICE` and `--trainer_count`. If you don't have any GPU available, please set `--use_gpu` to False. +Some of the scripts in `./examples` are configured with 8 GPUs. If you don't have 8 GPUs available, please modify `CUDA_VISIBLE_DEVICE` and `--trainer_count`. If you don't have any GPU available, please set `--use_gpu` to False to use CPUs instead. Let's take a tiny sampled subset of [LibriSpeech dataset](http://www.openslr.org/12/) for instance. - Go to directory ``` - cd examples/librispeech_tiny + cd examples/tiny ``` - Notice that this is only a toy example with a tiny sampled set of LibriSpeech. If we would like to try with the complete LibriSpeech (would take much a longer time for training), please go to `examples/librispeech` instead. -- Prepare the libripseech data + Notice that this is only a toy example with a tiny sampled subset of LibriSpeech. If we would like to try with the complete dataset (would take several days for training), please go to `examples/librispeech` instead. +- Prepare the data ``` - sh preprare_data.sh + sh run_data.sh ``` - `prepare_data.sh` downloads dataset, generates file manifests, collects normalizer' statitics and builds vocabulary for us. Once the running is done, we'll find our LibriSpeech data (not full in this "tiny" example) downloaded in `~/.cache/paddle/dataset/speech/Libri` and several manifest files as well as one mean stddev file generated in `./data/librispeech_tiny`, for the further model training. It needs to be run for only once. + `run_data.sh` will download dataset, generate manifests, collect normalizer' statitics and build vocabulary. Once the data preparation is done, we will find the data (only part of LibriSpeech) downloaded in `~/.cache/paddle/dataset/speech/libri` and the corresponding manifest files generated in `./data/tiny` as well as a mean stddev file and a vocabulary file. It has to be run for the very first time we run this dataset and is reusable for all further experiments. - Train your own ASR model ``` sh run_train.sh ``` - `run_train.sh` starts a training job, with training logs printed to stdout and model checkpoint of every pass/epoch saved to `./checkpoints`. We can resume the training from these checkpoints, or use them for inference, evalutiaton and deployment. + `run_train.sh` will start a training job, with training logs printed to stdout and model checkpoint of every pass/epoch saved to `./checkpoints/tiny`. We can resume the training from these checkpoints, or use them for inference, evalutiaton and deployment. - Case inference with an existing model ``` sh run_infer.sh ``` - `run_infer.sh` will quickly show us speech-to-text decoding results for several (default: 10) audio samples with an existing model. Since the model is only trained on a subset of LibriSpeech, the performance might not be very good. We can download a well-trained model and then do the inference: + `run_infer.sh` will show us some speech-to-text decoding results for several (default: 10) samples with the trained model. The performance might not be good now as the current model is only trained with a toy subset of LibriSpeech. To see the results with a better model, we can download a well-trained (trained for several days, with the complete LibriSpeech) model and do the inference: ``` - sh download_model_run_infer.sh + sh run_infer_golden.sh ``` - Evaluate an existing model @@ -76,14 +78,14 @@ Let's take a tiny sampled subset of [LibriSpeech dataset](http://www.openslr.org sh run_test.sh ``` - `run_test.sh` evaluates the model with Word Error Rate (or Character Error Rate) measurement. Similarly, we can also download a well-trained model and test its performance: + `run_test.sh` will evaluate the model with Word Error Rate (or Character Error Rate) measurement. Similarly, we can also download a well-trained model and test its performance: ``` - sh download_model_run_test.sh + sh run_test_golden.sh ``` - Try out a live demo with your own voice - Until now, we have trained and tested an ASR model quantitively and qualitatively with existing audios. But we haven't try the model with our own speech. `demo_server.sh` and `demo_client.sh` helps quickly build up a demo ASR engine with the trained model, enabling us to test and play around with the demo with our own voice. + Until now, we have trained and tested our ASR model qualitatively (`run_infer.sh`) and quantitively (`run_test.sh`) with existing audio files. But we have not yet play the model with our own speech. `demo_server.sh` and `demo_client.sh` helps quickly build up a demo ASR engine with the trained model, enabling us to test and play around with the demo with our own voice. We start the server in one console by entering: @@ -112,20 +114,20 @@ Wish you a happy journey with the DeepSpeech2 ASR engine! #### Generate Manifest -*DeepSpeech2 on PaddlePaddle* accepts a textual **manifest** file as its data set interface. A manifest file summarizes a set of speech data, with each line containing the meta data (e.g. filepath, transcription, duration) of one audio clip, in [JSON](http://www.json.org/) format, just as: +*DeepSpeech2 on PaddlePaddle* accepts a textual **manifest** file as its data set interface. A manifest file summarizes a set of speech data, with each line containing some meta data (e.g. filepath, transcription, duration) of one audio clip, in [JSON](http://www.json.org/) format, such as: ``` {"audio_filepath": "/home/work/.cache/paddle/Libri/134686/1089-134686-0001.flac", "duration": 3.275, "text": "stuff it into you his belly counselled him"} {"audio_filepath": "/home/work/.cache/paddle/Libri/134686/1089-134686-0007.flac", "duration": 4.275, "text": "a cold lucid indifference reigned in his soul"} ``` -To use any custom data, we only need to generate such manifest files to summarize the dataset. Given such summarized manifests, training, inference and all other modules can be aware of where to access the audio files, as well as their meta data including the transcription labels. +To use your custom data, you only need to generate such manifest files to summarize the dataset. Given such summarized manifests, training, inference and all other modules can be aware of where to access the audio files, as well as their meta data including the transcription labels. -For example script to generate such manifest files, please refer to `data/librispeech/librispeech.py`, which download and generate manifests for LibriSpeech dataset. +For how to generate such manifest files, please refer to `data/librispeech/librispeech.py`, which download and generate manifests for LibriSpeech dataset. #### Compute Mean & Stddev for Normalizer -To perform z-score normalization (zero-mean, unit stddev) upon audio features, we have to estimate in advance the mean and standard deviation of the features, with sampled training audios: +To perform z-score normalization (zero-mean, unit stddev) upon audio features, we have to estimate in advance the mean and standard deviation of the features, with some training samples: ``` python tools/compute_mean_std.py \ @@ -140,7 +142,7 @@ It will compute the mean and standard deviation of power spectgram feature with #### Build Vocabulary -A list of possible characters is required to convert the target transcription into list of token indices for training and in docoders convert from them back to text. Such a character-based vocabulary can be build with `tools/build_vocab.py`. +A vocabulary of possible characters is required to convert the transcription into a list of token indices for training, and in docoding, to convert from a list of indices back to text again. Such a character-based vocabulary can be build with `tools/build_vocab.py`. ``` python tools/build_vocab.py \ @@ -149,7 +151,7 @@ python tools/build_vocab.py \ --manifest_paths data/librispeech/manifest.train ``` -It will build a vocabuary file of `data/librispeeech/eng_vocab.txt` with all transcription text in `data/librispeech/manifest.train`, without character truncation. +It will write a vocabuary file `data/librispeeech/eng_vocab.txt` with all transcription text in `data/librispeech/manifest.train`, without vocabulary truncation (`--count_threshold 0`). #### More Help @@ -163,7 +165,7 @@ python tools/build_vocab.py --help ## Training a model -`train.py` is the main caller of the training module. We list several usage below. +`train.py` is the main caller of the training module. We show several examples of usage below. - Start training from scratch with 8 GPUs: @@ -176,7 +178,7 @@ python tools/build_vocab.py --help ``` python train.py --use_gpu False --trainer_count 16 ``` -- Resume training from a checkpoint (an existing model): +- Resume training from a checkpoint: ``` CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python train.py \ @@ -188,11 +190,11 @@ For more help on arguments: ``` python train.py --help ``` -or refer to `example/librispeech/run_train.sh. +or refer to `example/librispeech/run_train.sh`. -#### Augment the Dataset for Training +## Data Augmentation Pipeline -Data augmentation has often been a highly effective technique to boost the deep learning performance. We augment our speech data by synthesizing new audios with small random perterbation (label-invariant transformation) added upon raw audios. We don't have to do the syntheses by ourselves, as it is already embeded into the data provider and is done on the fly, randomly for each epoch. +Data augmentation has often been a highly effective technique to boost the deep learning performance. We augment our speech data by synthesizing new audios with small random perterbation (label-invariant transformation) added upon raw audios. We don't have to do the syntheses by ourselves, as it is already embeded into the data provider and is done on the fly, randomly for each epoch during training. Six optional augmentation components are provided for us to configured and inserted into the processing pipeline. @@ -203,7 +205,7 @@ Six optional augmentation components are provided for us to configured and inser - Noise Perturbation (need background noise audio files) - Impulse Response (need impulse audio files) -In order to inform the trainer of what augmentation components we need and what their processing orders are, we are required to prepare a *augmentation configuration file* in JSON format. For example: +In order to inform the trainer of what augmentation components we need and what their processing orders are, we are required to prepare a *augmentation configuration file* in [JSON](http://www.json.org/) format. For example: ``` [{ @@ -220,23 +222,23 @@ In order to inform the trainer of what augmentation components we need and what }] ``` -When the `--augment_conf_file` argument of `trainer.py` is set to the path of the above example configuration file, each audio clip in each epoch will be processed: with 60% of chance, it will first be speed perturbed with a uniformly random sampled speed-rate between 0.95 and 1.05, and then with 80% of chance it will be shifted in time with a random sampled offset between -5 ms and 5 ms. Finally this newly synthesized audio clip will be feed into the feature extractor for further training. +When the `--augment_conf_file` argument of `trainer.py` is set to the path of the above example configuration file, every audio clip in every epoch will be processed: with 60% of chance, it will first be speed perturbed with a uniformly random sampled speed-rate between 0.95 and 1.05, and then with 80% of chance it will be shifted in time with a random sampled offset between -5 ms and 5 ms. Finally this newly synthesized audio clip will be feed into the feature extractor for further training. -For configuration examples, please refer to `conf/augmenatation.config.example`. +For other configuration examples, please refer to `conf/augmenatation.config.example`. -Be careful when we are utilizing the data augmentation technique, as improper augmentation will instead do harm to the training, due to the enlarged train-test gap. +Be careful when we are utilizing the data augmentation technique, as improper augmentation will do harm to the training, due to the enlarged train-test gap. ## Inference and Evaluation -#### Prepare Language Model +### Prepare Language Model -A language model is required to improve the decoder's performance. We have prepared two language models (with lossy compression) for users to download and try. One is for English and the other is for Mandarin. Please refer to `examples/librispeech/download_model.sh` and `examples/mandarin_demo/download_model.sh` for their urls. If you wish to train your own better language model, please refer to [KenLM](https://github.com/kpu/kenlm) for tutorials. +A language model is required to improve the decoder's performance. We have prepared two language models (with lossy compression) for users to download and try. One is for English and the other is for Mandarin. Please refer to `models/lm/download_lm_en.sh` and `models/lm/download_lm_zh.sh` for their urls. If you wish to train your own better language model, please refer to [KenLM](https://github.com/kpu/kenlm) for tutorials. TODO: any other requirements or tips to add? -#### Speech-to-text Inference +### Speech-to-text Inference -We provide a inference module `infer.py` to infer, decode and visualize speech-to-text results for several given audio clips, which might help to have a intuitive and qualitative evaluation of the ASR model performance. +We provide a inference module `infer.py` to infer, decode and visualize speech-to-text results for several given audio clips. It might help us to have a intuitive and qualitative evaluation of the ASR model's performance. - Inference with GPU: @@ -247,21 +249,21 @@ We provide a inference module `infer.py` to infer, decode and visualize speech-t - Inference with CPU: ``` - python infer.py --use_gpu False + python infer.py --use_gpu False --trainer_count 12 ``` -We provide two CTC decoders: *CTC greedy decoder* and *CTC beam search decoder*. The *CTC greedy decoder* is an implementation of the simple best-path decoding algorithm, selecting at each timestep the most likely token, thus being greedy and locally optimal. The [*CTC beam search decoder*](https://arxiv.org/abs/1408.2873) otherwise utilzied a heuristic breadth-first gragh search for arriving at a near global optimality; it requires a pre-trained KenLM language model for better scoring and ranking sentences. The decoder type can be set with argument `--decoding_method`. +We provide two types of CTC decoders: *CTC greedy decoder* and *CTC beam search decoder*. The *CTC greedy decoder* is an implementation of the simple best-path decoding algorithm, selecting at each timestep the most likely token, thus being greedy and locally optimal. The [*CTC beam search decoder*](https://arxiv.org/abs/1408.2873) otherwise utilizes a heuristic breadth-first gragh search for reaching a near global optimality; it also requires a pre-trained KenLM language model for better scoring and ranking. The decoder type can be set with argument `--decoding_method`. For more help on arguments: ``` python infer.py --help ``` -or refer to `example/librispeech/run_infer.sh. +or refer to `example/librispeech/run_infer.sh`. -#### Evaluate a Model +### Evaluate a Model -To evaluate a model quantitively, we can run: +To evaluate a model's performance quantitively, we can run: - Evaluation with GPU: @@ -272,23 +274,23 @@ To evaluate a model quantitively, we can run: - Evaluation with CPU: ``` - python test.py --use_gpu False + python test.py --use_gpu False --trainer_count 12 ``` -The error rate (default: word error rate, can be set with `--error_rate_type`) will be printed. +The error rate (default: word error rate; can be set with `--error_rate_type`) will be printed. For more help on arguments: ``` python test.py --help ``` -or refer to `example/librispeech/run_test.sh. +or refer to `example/librispeech/run_test.sh`. ## Hyper-parameters Tuning -The hyper-parameters $\alpha$ (coefficient for language model scorer) and $\beta$ (coefficient for word count scorer) for the [*CTC beam search decoder*](https://arxiv.org/abs/1408.2873) often have a significant impact on the decoder's performance. It'd be better to re-tune them on validation samples after the accustic model is renewed. +The hyper-parameters $\alpha$ (coefficient for language model scorer) and $\beta$ (coefficient for word count scorer) for the [*CTC beam search decoder*](https://arxiv.org/abs/1408.2873) often have a significant impact on the decoder's performance. It would be better to re-tune them on a validation set when the accustic model is renewed. -`tools/tune.py` performs a 2-D grid search over the hyper-parameter $\alpha$ and $\beta$. We have to provide the range of $\alpha$ and $\beta$, as well as the number of attempts. +`tools/tune.py` performs a 2-D grid search over the hyper-parameter $\alpha$ and $\beta$. We have to provide the range of $\alpha$ and $\beta$, as well as the number of their attempts. - Tuning with GPU: @@ -309,12 +311,12 @@ The hyper-parameters $\alpha$ (coefficient for language model scorer) and $\beta python tools/tune.py --use_gpu False ``` -After tuning, we can reset $\alpha$ and $\beta$ in the inference and evaluation modules to see if they can really improve the ASR performance. +After tuning, we can reset $\alpha$ and $\beta$ in the inference and evaluation modules to see if they really help improve the ASR performance. ``` python tune.py --help ``` -or refer to `example/librispeech/run_tune.sh. +or refer to `example/librispeech/run_tune.sh`. TODO: add figure. @@ -352,4 +354,6 @@ It could be possible to start the server and the client in two seperate machines ## Experiments and Benchmarks +## Released Models + ## Questions and Help diff --git a/data/librispeech/eng_vocab.txt b/data/librispeech/eng_vocab.txt deleted file mode 100644 index 8268f3f33..000000000 --- a/data/librispeech/eng_vocab.txt +++ /dev/null @@ -1,28 +0,0 @@ -' - -a -b -c -d -e -f -g -h -i -j -k -l -m -n -o -p -q -r -s -t -u -v -w -x -y -z diff --git a/data/librispeech/librispeech.py b/data/librispeech/librispeech.py index 14a3804e2..e2ad8d413 100644 --- a/data/librispeech/librispeech.py +++ b/data/librispeech/librispeech.py @@ -19,8 +19,6 @@ import json import codecs from paddle.v2.dataset.common import md5file -DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech') - URL_ROOT = "http://www.openslr.org/resources/12" URL_TEST_CLEAN = URL_ROOT + "/test-clean.tar.gz" URL_TEST_OTHER = URL_ROOT + "/test-other.tar.gz" @@ -41,7 +39,7 @@ MD5_TRAIN_OTHER_500 = "d1a0fd59409feb2c614ce4d30c387708" parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( "--target_dir", - default=DATA_HOME + "/libri", + default='~/.cache/paddle/dataset/speech/libri', type=str, help="Directory to save the dataset. (default: %(default)s)") parser.add_argument( @@ -60,14 +58,14 @@ args = parser.parse_args() def download(url, md5sum, target_dir): - """ - Download file from url to target_dir, and check md5sum. + """Download file from url to target_dir, and check md5sum. """ if not os.path.exists(target_dir): os.makedirs(target_dir) filepath = os.path.join(target_dir, url.split("/")[-1]) if not (os.path.exists(filepath) and md5file(filepath) == md5sum): print("Downloading %s ..." % url) - os.system("wget -c " + url + " -P " + target_dir) + ret = os.system("wget -c " + url + " -P " + target_dir) + print(ret) print("\nMD5 Chesksum %s ..." % filepath) if not md5file(filepath) == md5sum: raise RuntimeError("MD5 checksum failed.") @@ -77,8 +75,7 @@ def download(url, md5sum, target_dir): def unpack(filepath, target_dir): - """ - Unpack the file to the target_dir. + """Unpack the file to the target_dir. """ print("Unpacking %s ..." % filepath) tar = tarfile.open(filepath) @@ -87,8 +84,7 @@ def unpack(filepath, target_dir): def create_manifest(data_dir, manifest_path): - """ - Create a manifest json file summarizing the data set, with each line + """Create a manifest json file summarizing the data set, with each line containing the meta data (i.e. audio filepath, transcription text, audio duration) of each audio file within the data set. """ @@ -119,8 +115,7 @@ def create_manifest(data_dir, manifest_path): def prepare_dataset(url, md5sum, target_dir, manifest_path): - """ - Download, unpack and create summmary manifest file. + """Download, unpack and create summmary manifest file. """ if not os.path.exists(os.path.join(target_dir, "LibriSpeech")): # download @@ -135,6 +130,8 @@ def prepare_dataset(url, md5sum, target_dir, manifest_path): def main(): + args.target_dir = os.path.expanduser(args.target_dir) + prepare_dataset( url=URL_TEST_CLEAN, md5sum=MD5_TEST_CLEAN, @@ -145,12 +142,12 @@ def main(): md5sum=MD5_DEV_CLEAN, target_dir=os.path.join(args.target_dir, "dev-clean"), manifest_path=args.manifest_prefix + ".dev-clean") - prepare_dataset( - url=URL_TRAIN_CLEAN_100, - md5sum=MD5_TRAIN_CLEAN_100, - target_dir=os.path.join(args.target_dir, "train-clean-100"), - manifest_path=args.manifest_prefix + ".train-clean-100") if args.full_download: + prepare_dataset( + url=URL_TRAIN_CLEAN_100, + md5sum=MD5_TRAIN_CLEAN_100, + target_dir=os.path.join(args.target_dir, "train-clean-100"), + manifest_path=args.manifest_prefix + ".train-clean-100") prepare_dataset( url=URL_TEST_OTHER, md5sum=MD5_TEST_OTHER, diff --git a/deploy/demo_server.py b/deploy/demo_server.py index 658b14197..2d3931f74 100644 --- a/deploy/demo_server.py +++ b/deploy/demo_server.py @@ -11,7 +11,7 @@ import wave import paddle.v2 as paddle import _init_paths from data_utils.data import DataGenerator -from models.model import DeepSpeech2Model +from model_utils.model import DeepSpeech2Model from data_utils.utils import read_manifest from utils.utility import add_arguments, print_arguments diff --git a/examples/librispeech/prepare_data.sh b/examples/librispeech/run_data.sh similarity index 57% rename from examples/librispeech/prepare_data.sh rename to examples/librispeech/run_data.sh index 6e9997703..f65aa233b 100644 --- a/examples/librispeech/prepare_data.sh +++ b/examples/librispeech/run_data.sh @@ -1,26 +1,31 @@ #! /usr/bin/bash -pushd ../.. +pushd ../.. > /dev/null # download data, generate manifests python data/librispeech/librispeech.py \ --manifest_prefix='data/librispeech/manifest' \ ---full_download='True' \ ---target_dir='~/.cache/paddle/dataset/speech/Libri' +--target_dir='~/.cache/paddle/dataset/speech/Libri' \ +--full_download='True' if [ $? -ne 0 ]; then echo "Prepare LibriSpeech failed. Terminated." exit 1 fi -cat data/librispeech/manifest.train* | shuf > data/librispeech/manifest.train +cat data/librispeech/manifest.train-* | shuf > data/librispeech/manifest.train -# build vocabulary (can be skipped for English, as already provided) -# python tools/build_vocab.py \ -# --count_threshold=0 \ -# --vocab_path='data/librispeech/eng_vocab.txt' \ -# --manifest_paths='data/librispeech/manifeset.train' +# build vocabulary +python tools/build_vocab.py \ +--count_threshold=0 \ +--vocab_path='data/librispeech/vocab.txt' \ +--manifest_paths='data/librispeech/manifest.train' + +if [ $? -ne 0 ]; then + echo "Build vocabulary failed. Terminated." + exit 1 +fi # compute mean and stddev for normalizer @@ -37,3 +42,4 @@ fi echo "LibriSpeech Data preparation done." +exit 0 diff --git a/examples/librispeech/run_infer.sh b/examples/librispeech/run_infer.sh index 619d546e8..6b790502a 100644 --- a/examples/librispeech/run_infer.sh +++ b/examples/librispeech/run_infer.sh @@ -1,13 +1,23 @@ #! /usr/bin/bash -pushd ../.. +pushd ../.. > /dev/null +# download language model +pushd models/lm > /dev/null +sh download_lm_en.sh +if [ $? -ne 0 ]; then + exit 1 +fi +popd > /dev/null + + +# infer CUDA_VISIBLE_DEVICES=0 \ python -u infer.py \ --num_samples=10 \ --trainer_count=1 \ --beam_size=500 \ ---num_proc_bsearch=12 \ +--num_proc_bsearch=8 \ --num_conv_layers=2 \ --num_rnn_layers=3 \ --rnn_layer_size=2048 \ @@ -17,11 +27,19 @@ python -u infer.py \ --use_gru=False \ --use_gpu=True \ --share_rnn_weights=True \ ---infer_manifest='data/librispeech/manifest.dev-clean' \ +--infer_manifest='data/librispeech/manifest.test-clean' \ --mean_std_path='data/librispeech/mean_std.npz' \ ---vocab_path='data/librispeech/eng_vocab.txt' \ ---model_path='checkpoints/params.latest.tar.gz' \ ---lang_model_path='lm/data/common_crawl_00.prune01111.trie.klm' \ +--vocab_path='data/librispeech/vocab.txt' \ +--model_path='checkpoints/libri/params.latest.tar.gz' \ +--lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \ --decoding_method='ctc_beam_search' \ --error_rate_type='wer' \ --specgram_type='linear' + +if [ $? -ne 0 ]; then + echo "Failed in inference!" + exit 1 +fi + + +exit 0 diff --git a/examples/librispeech/run_infer_golden.sh b/examples/librispeech/run_infer_golden.sh new file mode 100644 index 000000000..32e9d8623 --- /dev/null +++ b/examples/librispeech/run_infer_golden.sh @@ -0,0 +1,54 @@ +#! /usr/bin/bash + +pushd ../.. > /dev/null + +# download language model +pushd models/lm > /dev/null +sh download_lm_en.sh +if [ $? -ne 0 ]; then + exit 1 +fi +popd > /dev/null + + +# download well-trained model +pushd models/librispeech > /dev/null +sh download_model.sh +if [ $? -ne 0 ]; then + exit 1 +fi +popd > /dev/null + + +# infer +CUDA_VISIBLE_DEVICES=0 \ +python -u infer.py \ +--num_samples=10 \ +--trainer_count=1 \ +--beam_size=500 \ +--num_proc_bsearch=8 \ +--num_conv_layers=2 \ +--num_rnn_layers=3 \ +--rnn_layer_size=2048 \ +--alpha=0.36 \ +--beta=0.25 \ +--cutoff_prob=0.99 \ +--use_gru=False \ +--use_gpu=True \ +--share_rnn_weights=True \ +--infer_manifest='data/tiny/manifest.test-clean' \ +--mean_std_path='models/librispeech/mean_std.npz' \ +--vocab_path='models/librispeech/vocab.txt' \ +--model_path='models/librispeech/params.tar.gz' \ +--lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \ +--decoding_method='ctc_beam_search' \ +--error_rate_type='wer' \ +--specgram_type='linear' + +if [ $? -ne 0 ]; then + echo "Failed in inference!" + exit 1 +fi + + +exit 0 diff --git a/examples/librispeech/run_test.sh b/examples/librispeech/run_test.sh index 5a14cb682..9709234ab 100644 --- a/examples/librispeech/run_test.sh +++ b/examples/librispeech/run_test.sh @@ -1,14 +1,24 @@ #! /usr/bin/bash -pushd ../.. +pushd ../.. > /dev/null +# download language model +pushd models/lm > /dev/null +sh download_lm_en.sh +if [ $? -ne 0 ]; then + exit 1 +fi +popd > /dev/null + + +# evaluate model CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ -python -u evaluate.py \ +python -u test.py \ --batch_size=128 \ --trainer_count=8 \ --beam_size=500 \ ---num_proc_bsearch=12 \ ---num_proc_data=12 \ +--num_proc_bsearch=8 \ +--num_proc_data=4 \ --num_conv_layers=2 \ --num_rnn_layers=3 \ --rnn_layer_size=2048 \ @@ -20,9 +30,17 @@ python -u evaluate.py \ --share_rnn_weights=True \ --test_manifest='data/librispeech/manifest.test-clean' \ --mean_std_path='data/librispeech/mean_std.npz' \ ---vocab_path='data/librispeech/eng_vocab.txt' \ ---model_path='checkpoints/params.latest.tar.gz' \ ---lang_model_path='lm/data/common_crawl_00.prune01111.trie.klm' \ +--vocab_path='data/librispeech/vocab.txt' \ +--model_path='checkpoints/libri/params.latest.tar.gz' \ +--lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \ --decoding_method='ctc_beam_search' \ --error_rate_type='wer' \ --specgram_type='linear' + +if [ $? -ne 0 ]; then + echo "Failed in evaluation!" + exit 1 +fi + + +exit 0 diff --git a/examples/librispeech/run_test_golden.sh b/examples/librispeech/run_test_golden.sh new file mode 100644 index 000000000..080c3c062 --- /dev/null +++ b/examples/librispeech/run_test_golden.sh @@ -0,0 +1,55 @@ +#! /usr/bin/bash + +pushd ../.. > /dev/null + +# download language model +pushd models/lm > /dev/null +sh download_lm_en.sh +if [ $? -ne 0 ]; then + exit 1 +fi +popd > /dev/null + + +# download well-trained model +pushd models/librispeech > /dev/null +sh download_model.sh +if [ $? -ne 0 ]; then + exit 1 +fi +popd > /dev/null + + +# evaluate model +CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ +python -u test.py \ +--batch_size=128 \ +--trainer_count=8 \ +--beam_size=500 \ +--num_proc_bsearch=8 \ +--num_proc_data=4 \ +--num_conv_layers=2 \ +--num_rnn_layers=3 \ +--rnn_layer_size=2048 \ +--alpha=0.36 \ +--beta=0.25 \ +--cutoff_prob=0.99 \ +--use_gru=False \ +--use_gpu=True \ +--share_rnn_weights=True \ +--test_manifest='data/tiny/manifest.test-clean' \ +--mean_std_path='models/librispeech/mean_std.npz' \ +--vocab_path='models/librispeech/vocab.txt' \ +--model_path='models/librispeech/params.tar.gz' \ +--lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \ +--decoding_method='ctc_beam_search' \ +--error_rate_type='wer' \ +--specgram_type='linear' + +if [ $? -ne 0 ]; then + echo "Failed in evaluation!" + exit 1 +fi + + +exit 0 diff --git a/examples/librispeech/run_train.sh b/examples/librispeech/run_train.sh index 14672167c..5485475e9 100644 --- a/examples/librispeech/run_train.sh +++ b/examples/librispeech/run_train.sh @@ -1,10 +1,11 @@ #! /usr/bin/bash -pushd ../.. +pushd ../.. > /dev/null +# train model CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ python -u train.py \ ---batch_size=256 \ +--batch_size=512 \ --trainer_count=8 \ --num_passes=50 \ --num_proc_data=12 \ @@ -23,8 +24,16 @@ python -u train.py \ --train_manifest='data/librispeech/manifest.train' \ --dev_manifest='data/librispeech/manifest.dev' \ --mean_std_path='data/librispeech/mean_std.npz' \ ---vocab_path='data/librispeech/eng_vocab.txt' \ ---output_model_dir='./checkpoints' \ +--vocab_path='data/librispeech/vocab.txt' \ +--output_model_dir='./checkpoints/libri' \ --augment_conf_path='conf/augmentation.config' \ --specgram_type='linear' \ --shuffle_method='batch_shuffle_clipped' + +if [ $? -ne 0 ]; then + echo "Failed in training!" + exit 1 +fi + + +exit 0 diff --git a/examples/librispeech/run_tune.sh b/examples/librispeech/run_tune.sh index 9d992e884..05c024bec 100644 --- a/examples/librispeech/run_tune.sh +++ b/examples/librispeech/run_tune.sh @@ -1,7 +1,8 @@ #! /usr/bin/bash -pushd ../.. +pushd ../.. > /dev/null +# grid-search for hyper-parameters in language model CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ python -u tools/tune.py \ --num_samples=100 \ @@ -23,8 +24,16 @@ python -u tools/tune.py \ --share_rnn_weights=True \ --tune_manifest='data/librispeech/manifest.dev-clean' \ --mean_std_path='data/librispeech/mean_std.npz' \ ---vocab_path='data/librispeech/eng_vocab.txt' \ ---model_path='checkpoints/params.latest.tar.gz' \ ---lang_model_path='lm/data/common_crawl_00.prune01111.trie.klm' \ +--vocab_path='data/librispeech/vocab.txt' \ +--model_path='checkpoints/libri/params.latest.tar.gz' \ +--lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \ --error_rate_type='wer' \ --specgram_type='linear' + +if [ $? -ne 0 ]; then + echo "Failed in tuning!" + exit 1 +fi + + +exit 0 diff --git a/examples/mandarin/run_demo_client.sh b/examples/mandarin/run_demo_client.sh new file mode 100644 index 000000000..dfde20f88 --- /dev/null +++ b/examples/mandarin/run_demo_client.sh @@ -0,0 +1,17 @@ +#! /usr/bin/bash + +pushd ../.. > /dev/null + +# start demo client +CUDA_VISIBLE_DEVICES=0 \ +python -u deploy/demo_client.py \ +--host_ip='localhost' \ +--host_port=8086 \ + +if [ $? -ne 0 ]; then + echo "Failed in starting demo client!" + exit 1 +fi + + +exit 0 diff --git a/examples/mandarin/run_demo_server.sh b/examples/mandarin/run_demo_server.sh new file mode 100644 index 000000000..703184a6b --- /dev/null +++ b/examples/mandarin/run_demo_server.sh @@ -0,0 +1,53 @@ +#! /usr/bin/bash +# TODO: replace the model with a mandarin model + +pushd ../.. > /dev/null + +# download language model +pushd models/lm > /dev/null +sh download_lm_en.sh +if [ $? -ne 0 ]; then + exit 1 +fi +popd > /dev/null + + +# download well-trained model +pushd models/librispeech > /dev/null +sh download_model.sh +if [ $? -ne 0 ]; then + exit 1 +fi +popd > /dev/null + + +# start demo server +CUDA_VISIBLE_DEVICES=0 \ +python -u deploy/demo_server.py \ +--host_ip='localhost' \ +--host_port=8086 \ +--num_conv_layers=2 \ +--num_rnn_layers=3 \ +--rnn_layer_size=2048 \ +--alpha=0.36 \ +--beta=0.25 \ +--cutoff_prob=0.99 \ +--use_gru=False \ +--use_gpu=True \ +--share_rnn_weights=True \ +--speech_save_dir='demo_cache' \ +--warmup_manifest='data/tiny/manifest.test-clean' \ +--mean_std_path='models/librispeech/mean_std.npz' \ +--vocab_path='models/librispeech/vocab.txt' \ +--model_path='models/librispeech/params.tar.gz' \ +--lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \ +--decoding_method='ctc_beam_search' \ +--specgram_type='linear' + +if [ $? -ne 0 ]; then + echo "Failed in starting demo server!" + exit 1 +fi + + +exit 0 diff --git a/examples/tiny/run_data.sh b/examples/tiny/run_data.sh index 44345d8cc..203d3e2c8 100644 --- a/examples/tiny/run_data.sh +++ b/examples/tiny/run_data.sh @@ -1,27 +1,26 @@ #! /usr/bin/bash -pushd ../.. +pushd ../.. > /dev/null # download data, generate manifests -python data/tiny/tiny.py \ +python data/librispeech/librispeech.py \ --manifest_prefix='data/tiny/manifest' \ ---target_dir=$HOME'/.cache/paddle/dataset/speech/tiny' +--target_dir='~/.cache/paddle/dataset/speech/libri' \ +--full_download='False' if [ $? -ne 0 ]; then echo "Prepare LibriSpeech failed. Terminated." exit 1 fi -cat data/tiny/manifest.dev-clean | head -n 32 > data/tiny/manifest.train -cat data/tiny/manifest.dev-clean | head -n 48 | tail -n 16 > data/tiny/manifest.dev -cat data/tiny/manifest.dev-clean | head -n 64 | tail -n 16 > data/tiny/manifest.test +head -n 64 data/tiny/manifest.dev-clean > data/tiny/manifest.tiny # build vocabulary python tools/build_vocab.py \ --count_threshold=0 \ --vocab_path='data/tiny/vocab.txt' \ ---manifest_paths='data/tiny/manifest.train' +--manifest_paths='data/tiny/manifest.dev' if [ $? -ne 0 ]; then echo "Build vocabulary failed. Terminated." @@ -31,8 +30,8 @@ fi # compute mean and stddev for normalizer python tools/compute_mean_std.py \ ---manifest_path='data/tiny/manifest.train' \ ---num_samples=32 \ +--manifest_path='data/tiny/manifest.tiny' \ +--num_samples=64 \ --specgram_type='linear' \ --output_path='data/tiny/mean_std.npz' @@ -43,3 +42,4 @@ fi echo "Tiny data preparation done." +exit 0 diff --git a/examples/tiny/run_infer.sh b/examples/tiny/run_infer.sh index f09bc6638..1d33bfbba 100644 --- a/examples/tiny/run_infer.sh +++ b/examples/tiny/run_infer.sh @@ -1,13 +1,23 @@ #! /usr/bin/bash -pushd ../.. +pushd ../.. > /dev/null +# download language model +pushd models/lm > /dev/null +sh download_lm_en.sh +if [ $? -ne 0 ]; then + exit 1 +fi +popd > /dev/null + + +# infer CUDA_VISIBLE_DEVICES=0 \ python -u infer.py \ ---num_samples=4 \ +--num_samples=10 \ --trainer_count=1 \ --beam_size=500 \ ---num_proc_bsearch=12 \ +--num_proc_bsearch=8 \ --num_conv_layers=2 \ --num_rnn_layers=3 \ --rnn_layer_size=2048 \ @@ -17,11 +27,19 @@ python -u infer.py \ --use_gru=False \ --use_gpu=True \ --share_rnn_weights=True \ ---infer_manifest='data/tiny/manifest.train' \ +--infer_manifest='data/tiny/manifest.tiny' \ --mean_std_path='data/tiny/mean_std.npz' \ --vocab_path='data/tiny/vocab.txt' \ ---model_path='checkpoints/params.pass-14.tar.gz' \ +--model_path='checkpoints/tiny/params.pass-19.tar.gz' \ --lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \ --decoding_method='ctc_beam_search' \ --error_rate_type='wer' \ --specgram_type='linear' + +if [ $? -ne 0 ]; then + echo "Failed in inference!" + exit 1 +fi + + +exit 0 diff --git a/examples/tiny/run_infer_golden.sh b/examples/tiny/run_infer_golden.sh new file mode 100644 index 000000000..32e9d8623 --- /dev/null +++ b/examples/tiny/run_infer_golden.sh @@ -0,0 +1,54 @@ +#! /usr/bin/bash + +pushd ../.. > /dev/null + +# download language model +pushd models/lm > /dev/null +sh download_lm_en.sh +if [ $? -ne 0 ]; then + exit 1 +fi +popd > /dev/null + + +# download well-trained model +pushd models/librispeech > /dev/null +sh download_model.sh +if [ $? -ne 0 ]; then + exit 1 +fi +popd > /dev/null + + +# infer +CUDA_VISIBLE_DEVICES=0 \ +python -u infer.py \ +--num_samples=10 \ +--trainer_count=1 \ +--beam_size=500 \ +--num_proc_bsearch=8 \ +--num_conv_layers=2 \ +--num_rnn_layers=3 \ +--rnn_layer_size=2048 \ +--alpha=0.36 \ +--beta=0.25 \ +--cutoff_prob=0.99 \ +--use_gru=False \ +--use_gpu=True \ +--share_rnn_weights=True \ +--infer_manifest='data/tiny/manifest.test-clean' \ +--mean_std_path='models/librispeech/mean_std.npz' \ +--vocab_path='models/librispeech/vocab.txt' \ +--model_path='models/librispeech/params.tar.gz' \ +--lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \ +--decoding_method='ctc_beam_search' \ +--error_rate_type='wer' \ +--specgram_type='linear' + +if [ $? -ne 0 ]; then + echo "Failed in inference!" + exit 1 +fi + + +exit 0 diff --git a/examples/tiny/run_test.sh b/examples/tiny/run_test.sh index 5a14cb682..f9c3cc11c 100644 --- a/examples/tiny/run_test.sh +++ b/examples/tiny/run_test.sh @@ -1,14 +1,24 @@ #! /usr/bin/bash -pushd ../.. +pushd ../.. > /dev/null +# download language model +pushd models/lm > /dev/null +sh download_lm_en.sh +if [ $? -ne 0 ]; then + exit 1 +fi +popd > /dev/null + + +# evaluate model CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ -python -u evaluate.py \ ---batch_size=128 \ +python -u test.py \ +--batch_size=16 \ --trainer_count=8 \ --beam_size=500 \ ---num_proc_bsearch=12 \ ---num_proc_data=12 \ +--num_proc_bsearch=8 \ +--num_proc_data=4 \ --num_conv_layers=2 \ --num_rnn_layers=3 \ --rnn_layer_size=2048 \ @@ -18,11 +28,19 @@ python -u evaluate.py \ --use_gru=False \ --use_gpu=True \ --share_rnn_weights=True \ ---test_manifest='data/librispeech/manifest.test-clean' \ ---mean_std_path='data/librispeech/mean_std.npz' \ ---vocab_path='data/librispeech/eng_vocab.txt' \ ---model_path='checkpoints/params.latest.tar.gz' \ ---lang_model_path='lm/data/common_crawl_00.prune01111.trie.klm' \ +--test_manifest='data/tiny/manifest.tiny' \ +--mean_std_path='data/tiny/mean_std.npz' \ +--vocab_path='data/tiny/vocab.txt' \ +--model_path='checkpoints/params.pass-19.tar.gz' \ +--lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \ --decoding_method='ctc_beam_search' \ --error_rate_type='wer' \ --specgram_type='linear' + +if [ $? -ne 0 ]; then + echo "Failed in evaluation!" + exit 1 +fi + + +exit 0 diff --git a/examples/tiny/run_test_golden.sh b/examples/tiny/run_test_golden.sh new file mode 100644 index 000000000..080c3c062 --- /dev/null +++ b/examples/tiny/run_test_golden.sh @@ -0,0 +1,55 @@ +#! /usr/bin/bash + +pushd ../.. > /dev/null + +# download language model +pushd models/lm > /dev/null +sh download_lm_en.sh +if [ $? -ne 0 ]; then + exit 1 +fi +popd > /dev/null + + +# download well-trained model +pushd models/librispeech > /dev/null +sh download_model.sh +if [ $? -ne 0 ]; then + exit 1 +fi +popd > /dev/null + + +# evaluate model +CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ +python -u test.py \ +--batch_size=128 \ +--trainer_count=8 \ +--beam_size=500 \ +--num_proc_bsearch=8 \ +--num_proc_data=4 \ +--num_conv_layers=2 \ +--num_rnn_layers=3 \ +--rnn_layer_size=2048 \ +--alpha=0.36 \ +--beta=0.25 \ +--cutoff_prob=0.99 \ +--use_gru=False \ +--use_gpu=True \ +--share_rnn_weights=True \ +--test_manifest='data/tiny/manifest.test-clean' \ +--mean_std_path='models/librispeech/mean_std.npz' \ +--vocab_path='models/librispeech/vocab.txt' \ +--model_path='models/librispeech/params.tar.gz' \ +--lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \ +--decoding_method='ctc_beam_search' \ +--error_rate_type='wer' \ +--specgram_type='linear' + +if [ $? -ne 0 ]; then + echo "Failed in evaluation!" + exit 1 +fi + + +exit 0 diff --git a/examples/tiny/run_train.sh b/examples/tiny/run_train.sh index 7ca336876..c66ec4e56 100644 --- a/examples/tiny/run_train.sh +++ b/examples/tiny/run_train.sh @@ -1,18 +1,19 @@ #! /usr/bin/bash -pushd ../.. +pushd ../.. > /dev/null -CUDA_VISIBLE_DEVICES=0,1 \ +# train model +CUDA_VISIBLE_DEVICES=0,1,2,3 \ python -u train.py \ ---batch_size=2 \ ---trainer_count=1 \ ---num_passes=10 \ +--batch_size=16 \ +--trainer_count=4 \ +--num_passes=20 \ --num_proc_data=1 \ --num_conv_layers=2 \ --num_rnn_layers=3 \ --rnn_layer_size=2048 \ --num_iter_print=100 \ ---learning_rate=5e-5 \ +--learning_rate=1e-5 \ --max_duration=27.0 \ --min_duration=0.0 \ --use_sortagrad=True \ @@ -20,11 +21,19 @@ python -u train.py \ --use_gpu=True \ --is_local=True \ --share_rnn_weights=True \ ---train_manifest='data/tiny/manifest.train' \ ---dev_manifest='data/tiny/manifest.train' \ +--train_manifest='data/tiny/manifest.tiny' \ +--dev_manifest='data/tiny/manifest.tiny' \ --mean_std_path='data/tiny/mean_std.npz' \ --vocab_path='data/tiny/vocab.txt' \ ---output_model_dir='./checkpoints' \ +--output_model_dir='./checkpoints/tiny' \ --augment_conf_path='conf/augmentation.config' \ --specgram_type='linear' \ --shuffle_method='batch_shuffle_clipped' + +if [ $? -ne 0 ]; then + echo "Fail to do inference!" + exit 1 +fi + + +exit 0 diff --git a/examples/tiny/run_tune.sh b/examples/tiny/run_tune.sh index 9d992e884..360c11d59 100644 --- a/examples/tiny/run_tune.sh +++ b/examples/tiny/run_tune.sh @@ -1,7 +1,8 @@ #! /usr/bin/bash -pushd ../.. +pushd ../.. > /dev/null +# grid-search for hyper-parameters in language model CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ python -u tools/tune.py \ --num_samples=100 \ @@ -21,10 +22,18 @@ python -u tools/tune.py \ --use_gru=False \ --use_gpu=True \ --share_rnn_weights=True \ ---tune_manifest='data/librispeech/manifest.dev-clean' \ ---mean_std_path='data/librispeech/mean_std.npz' \ ---vocab_path='data/librispeech/eng_vocab.txt' \ ---model_path='checkpoints/params.latest.tar.gz' \ ---lang_model_path='lm/data/common_crawl_00.prune01111.trie.klm' \ +--tune_manifest='data/tiny/manifest.tiny' \ +--mean_std_path='data/tiny/mean_std.npz' \ +--vocab_path='data/tiny/vocab.txt' \ +--model_path='checkpoints/params.pass-9.tar.gz' \ +--lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \ --error_rate_type='wer' \ --specgram_type='linear' + +if [ $? -ne 0 ]; then + echo "Failed in tuning!" + exit 1 +fi + + +exit 0 diff --git a/models/librispeech/download_model.sh b/models/librispeech/download_model.sh new file mode 100644 index 000000000..4408f6c1c --- /dev/null +++ b/models/librispeech/download_model.sh @@ -0,0 +1,20 @@ +#! /usr/bin/bash + +source ../../utils/utility.sh + +# TODO: add urls +URL='to-be-added' +MD5=5b4af224b26c1dc4dd972b7d32f2f52a +TARGET=./librispeech_model.tar.gz + + +echo "Download LibriSpeech model ..." +download $URL $MD5 $TARGET +if [ $? -ne 0 ]; then + echo "Fail to download LibriSpeech model!" + exit 1 +fi +tar -zxvf $TARGET + + +exit 0 diff --git a/models/lm/download_en.sh b/models/lm/download_en.sh deleted file mode 100644 index 5ca33c679..000000000 --- a/models/lm/download_en.sh +++ /dev/null @@ -1,16 +0,0 @@ -echo "Downloading language model ..." - -mkdir data - -LM=common_crawl_00.prune01111.trie.klm -MD5="099a601759d467cd0a8523ff939819c5" - -wget -c http://paddlepaddle.bj.bcebos.com/model_zoo/speech/$LM -P ./data - -echo "Checking md5sum ..." -md5_tmp=`md5sum ./data/$LM | awk -F[' '] '{print $1}'` - -if [ $MD5 != $md5_tmp ]; then - echo "Fail to download the language model!" - exit 1 -fi diff --git a/models/lm/download_lm_en.sh b/models/lm/download_lm_en.sh new file mode 100644 index 000000000..e967e25dc --- /dev/null +++ b/models/lm/download_lm_en.sh @@ -0,0 +1,18 @@ +#! /usr/bin/bash + +source ../../utils/utility.sh + +URL=http://paddlepaddle.bj.bcebos.com/model_zoo/speech/common_crawl_00.prune01111.trie.klm +MD5="099a601759d467cd0a8523ff939819c5" +TARGET=./common_crawl_00.prune01111.trie.klm + + +echo "Download language model ..." +download $URL $MD5 $TARGET +if [ $? -ne 0 ]; then + echo "Fail to download the language model!" + exit 1 +fi + + +exit 0 diff --git a/utils/utility.sh b/utils/utility.sh new file mode 100644 index 000000000..4f617bfa9 --- /dev/null +++ b/utils/utility.sh @@ -0,0 +1,20 @@ +download() { + URL=$1 + MD5=$2 + TARGET=$3 + + if [ -e $TARGET ]; then + md5_result=`md5sum $TARGET | awk -F[' '] '{print $1}'` + if [ $MD5 == $md5_result ]; then + echo "$TARGET already exists, download skipped." + return 0 + fi + fi + + wget -c $URL -P `dirname "$TARGET"` + md5_result=`md5sum $TARGET | awk -F[' '] '{print $1}'` + if [ $MD5 == $md5_result ]; then + echo "Fail to download the language model!" + return 1 + fi +} From 87453365b2f24486e23763bd4baf0e31147de017 Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Tue, 12 Sep 2017 14:12:14 +0800 Subject: [PATCH 184/335] Update REAME.md for DS2. --- .gitignore | 3 - README.md | 143 ++++++++++++++++++++++++++++---------- data/tiny/tiny.py | 126 --------------------------------- examples/tiny/run_data.sh | 6 ++ 4 files changed, 111 insertions(+), 167 deletions(-) delete mode 100644 .gitignore delete mode 100644 data/tiny/tiny.py diff --git a/.gitignore b/.gitignore deleted file mode 100644 index db0537f3b..000000000 --- a/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -manifest* -mean_std.npz -thirdparty/ diff --git a/README.md b/README.md index aae0dc6d8..afa6dd512 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,7 @@ sh setup.sh ## Getting Started -Several shell scripts provided in `./examples` will help us to quickly give it a try, for most major modules, including data preparation, model training, case inference, model evaluation and demo deployment, with a few public dataset (e.g. [LibriSpeech](http://www.openslr.org/12/), [Aishell](https://github.com/kaldi-asr/kaldi/tree/master/egs/aishell)). Reading these examples will also help us understand how to make it work with our own data. +Several shell scripts provided in `./examples` will help us to quickly give it a try, for most major modules, including data preparation, model training, case inference and model evaluation, with a few public dataset (e.g. [LibriSpeech](http://www.openslr.org/12/), [Aishell](https://github.com/kaldi-asr/kaldi/tree/master/egs/aishell)). Reading these examples will also help us understand how to make it work with our own data. Some of the scripts in `./examples` are configured with 8 GPUs. If you don't have 8 GPUs available, please modify `CUDA_VISIBLE_DEVICE` and `--trainer_count`. If you don't have any GPU available, please set `--use_gpu` to False to use CPUs instead. @@ -83,27 +83,6 @@ Let's take a tiny sampled subset of [LibriSpeech dataset](http://www.openslr.org ``` sh run_test_golden.sh ``` -- Try out a live demo with your own voice - - Until now, we have trained and tested our ASR model qualitatively (`run_infer.sh`) and quantitively (`run_test.sh`) with existing audio files. But we have not yet play the model with our own speech. `demo_server.sh` and `demo_client.sh` helps quickly build up a demo ASR engine with the trained model, enabling us to test and play around with the demo with our own voice. - - We start the server in one console by entering: - - ``` - sh run_demo_server.sh - ``` - - and start the client in another console by entering: - - ``` - sh run_demo_client.sh - ``` - - Then, in the client console, press the `whitespace` key, hold, and start speaking. Until we finish our ulterance, we release the key to let the speech-to-text results show in the console. - - Notice that `run_demo_client.sh` must be run in a machine with a microphone device, while `run_demo_server.sh` could be run in one without any audio recording device, e.g. any remote server. Just be careful to update `run_demo_server.sh` and `run_demo_client.sh` with the actual accessable IP address and port, if the server and client are running with two seperate machines. Nothing has to be done if running in one single machine. - - This demo will first download a pre-trained Mandarin model (trained with 3000 hours of internal speech data). If we would like to try some other model, just update `model_path` argument in the script.       More detailed information are provided in the following sections. @@ -112,7 +91,7 @@ Wish you a happy journey with the DeepSpeech2 ASR engine! ## Data Preparation -#### Generate Manifest +### Generate Manifest *DeepSpeech2 on PaddlePaddle* accepts a textual **manifest** file as its data set interface. A manifest file summarizes a set of speech data, with each line containing some meta data (e.g. filepath, transcription, duration) of one audio clip, in [JSON](http://www.json.org/) format, such as: @@ -125,7 +104,7 @@ To use your custom data, you only need to generate such manifest files to summar For how to generate such manifest files, please refer to `data/librispeech/librispeech.py`, which download and generate manifests for LibriSpeech dataset. -#### Compute Mean & Stddev for Normalizer +### Compute Mean & Stddev for Normalizer To perform z-score normalization (zero-mean, unit stddev) upon audio features, we have to estimate in advance the mean and standard deviation of the features, with some training samples: @@ -139,8 +118,7 @@ python tools/compute_mean_std.py \ It will compute the mean and standard deviation of power spectgram feature with 2000 random sampled audio clips listed in `data/librispeech/manifest.train` and save the results to `data/librispeech/mean_std.npz` for further usage. - -#### Build Vocabulary +### Build Vocabulary A vocabulary of possible characters is required to convert the transcription into a list of token indices for training, and in docoding, to convert from a list of indices back to text again. Such a character-based vocabulary can be build with `tools/build_vocab.py`. @@ -153,7 +131,7 @@ python tools/build_vocab.py \ It will write a vocabuary file `data/librispeeech/eng_vocab.txt` with all transcription text in `data/librispeech/manifest.train`, without vocabulary truncation (`--count_threshold 0`). -#### More Help +### More Help For more help on arguments: @@ -181,7 +159,8 @@ python tools/build_vocab.py --help - Resume training from a checkpoint: ``` - CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python train.py \ + CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ + python train.py \ --init_model_path CHECKPOINT_PATH_TO_RESUME_FROM ``` @@ -295,7 +274,8 @@ The hyper-parameters $\alpha$ (coefficient for language model scorer) and $\beta - Tuning with GPU: ``` - CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python tools/tune.py \ + CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ + python tools/tune.py \ --trainer_count 8 \ --alpha_from 0.1 \ --alpha_to 0.36 \ @@ -322,14 +302,86 @@ TODO: add figure. ## Distributed Cloud Training -If you wish to train DeepSpeech2 on PaddleCloud, please refer to +We provide a cloud training module for users to do the distributed cluster training on [PaddleCloud](https://github.com/PaddlePaddle/cloud), to achieve a much faster training speed with multiple machines. To start with this, please first install PaddleCloud client and register a PaddleCloud account, as described in [PaddleCloud Usage](https://github.com/PaddlePaddle/cloud/blob/develop/doc/usage_cn.md#%E4%B8%8B%E8%BD%BD%E5%B9%B6%E9%85%8D%E7%BD%AEpaddlecloud). + +Then, we take the following steps to sumbit a training job: + +- go to directory: + + ``` + cd cloud + ``` +- Upload data: + + Data must be uploaded to PaddleCloud filesystem to be accessed from a cloud job. `pcloud_upload_data.sh` helps do the data packing and uploading: + + ``` + sh pcloud_upload_data.sh + ``` + + Given input manifests, `pcloud_upload_data.sh` will: + + - Extract the audio files listed in the input manifests. + - Pack them into a specified number of tar files. + - Upload these tar files to PaddleCloud filesystem. + - Create cloud manifests by replacing local filesystem paths with PaddleCloud filesystem paths. New manifests will be used to inform the cloud jobs of audio files' location and their meta information. + + It has to be done only once for the very first time we do the cloud training. Later on, the data is persisitent on the cloud filesystem and reusable for further job submissions. + + For argument details please refer to [Train DeepSpeech2 on PaddleCloud](https://github.com/PaddlePaddle/models/tree/develop/deep_speech_2/cloud). + + - Configure training arguments: + + Configure the cloud job parameters in `pcloud_submit.sh` (e.g. `NUM_NODES`, `NUM_GPUS`, `CLOUD_TRAIN_DIR`, `JOB_NAME` etc.) and then configure other hyper-parameters for training in `pcloud_train.sh` (just as what you do for local training). + + For argument details please refer to [Train DeepSpeech2 on PaddleCloud](https://github.com/PaddlePaddle/models/tree/develop/deep_speech_2/cloud). + + - Submit the job: + + By running: + + ``` + sh pcloud_submit.sh + ``` + we submit a training job to PaddleCloud. And we will see the job name when the submission is finished. Now our training job is running well on the PaddleCloud. + + - Get training logs + + Run this to list all the jobs you have submitted, as well as their running status: + + ``` + paddlecloud get jobs + ``` + + Run this, the corresponding job's logs will be printed. + ``` + paddlecloud logs -n 10000 $REPLACED_WITH_YOUR_ACTUAL_JOB_NAME + ``` + +For more information about the usage of PaddleCloud, please refer to [PaddleCloud Usage](https://github.com/PaddlePaddle/cloud/blob/develop/doc/usage_cn.md#提交任务). + +For more information about the DeepSpeech2 training on PaddleCloud, please refer to [Train DeepSpeech2 on PaddleCloud](https://github.com/PaddlePaddle/models/tree/develop/deep_speech_2/cloud). ## Training for Mandarin Language +TODO: to be added + ## Trying Live Demo with Your Own Voice -A real-time ASR demo is built for users to try out the ASR model with their own voice. Please do the following installation on the machine you'd like to run the demo's client (no need for the machine running the demo's server). +Until now, we have trained and tested our ASR model qualitatively (`infer.py`) and quantitively (`test.py`) with existing audio files. But we have not yet play the model with our own speech. `deploy/demo_server.py` and `deploy/demo_client.py` helps quickly build up a real-time demo ASR engine with the trained model, enabling us to test and play around with the demo, with our own voice. + +We start the demo's server in one console by: + +``` +CUDA_VISIBLE_DEVICES=0 \ +python deploy/demo_server.py \ +--trainer_count 1 \ +--host_ip localhost \ +--host_port 8086 +``` + +For the machine (might be the same or a different machine) to run the demo's client, we have to do the following installation before moving on. For example, on MAC OS X: @@ -338,22 +390,37 @@ brew install portaudio pip install pyaudio pip install pynput ``` -After a model and language model is prepared, we can first start the demo's server: + +Then we can start the client in another console by: ``` -CUDA_VISIBLE_DEVICES=0 python demo_server.py +CUDA_VISIBLE_DEVICES=0 \ +python -u deploy/demo_client.py \ +--host_ip 'localhost' \ +--host_port 8086 \ ``` -And then in another console, start the demo's client: + +Next, in the client console, press the `whitespace` key, hold, and start speaking. Until we finish our ulterance, we release the key to let the speech-to-text results shown in the console. To quit the client, just press `ESC` key. + +Notice that `deploy/demo_client.py` must be run in a machine with a microphone device, while `deploy/demo_server.py` could be run in one without any audio recording hardware, e.g. any remote server machine. Just be careful to set the `host_ip` and `host_port` argument with the actual accessable IP address and port, if the server and client are running with two seperate machines. Nothing has to be done if they are running in one single machine. + +We can also refer to `examples/mandarin/run_demo_server.sh` for example, which will first download a pre-trained Mandarin model (trained with 3000 hours of internal speech data) and then start the demo server with the model. With running `examples/mandarin/run_demo_client.sh`, we can speak Mandarin to test it. If we would like to try some other models, just update `--model_path` argument in the script.   + +For more help on arguments: ``` -python demo_client.py +python deploy/demo_server.py --help +python deploy/demo_client.py --help ``` -On the client console, press and hold the "white-space" key on the keyboard to start talking, until you finish your speech and then release the "white-space" key. The decoding results (infered transcription) will be displayed. - -It could be possible to start the server and the client in two seperate machines, e.g. `demo_client.py` is usually started in a machine with a microphone hardware, while `demo_server.py` is usually started in a remote server with powerful GPUs. Please first make sure that these two machines have network access to each other, and then use `--host_ip` and `--host_port` to indicate the server machine's actual IP address (instead of the `localhost` as default) and TCP port, in both `demo_server.py` and `demo_client.py`. ## Experiments and Benchmarks +TODO: to be added + ## Released Models +TODO: to be added + ## Questions and Help + +You are welcome to submit questions and bug reports in [Github Issues](https://github.com/PaddlePaddle/models/issues). You are also welcome to contribute to this project. diff --git a/data/tiny/tiny.py b/data/tiny/tiny.py deleted file mode 100644 index 8ba2a13c5..000000000 --- a/data/tiny/tiny.py +++ /dev/null @@ -1,126 +0,0 @@ -"""Prepare Librispeech ASR datasets. - -Download, unpack and create manifest files. -Manifest file is a json-format file with each line containing the -meta data (i.e. audio filepath, transcript and audio duration) -of each audio file in the data set. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import distutils.util -import os -import sys -import tarfile -import argparse -import soundfile -import json -import codecs -from paddle.v2.dataset.common import md5file - -DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech') - -URL_ROOT = "http://www.openslr.org/resources/12" -URL_DEV_CLEAN = URL_ROOT + "/dev-clean.tar.gz" -MD5_DEV_CLEAN = "42e2234ba48799c1f50f24a7926300a1" - -parser = argparse.ArgumentParser(description=__doc__) -parser.add_argument( - "--target_dir", - default=DATA_HOME + "/tiny", - type=str, - help="Directory to save the dataset. (default: %(default)s)") -parser.add_argument( - "--manifest_prefix", - default="manifest", - type=str, - help="Filepath prefix for output manifests. (default: %(default)s)") -args = parser.parse_args() - - -def download(url, md5sum, target_dir): - """ - Download file from url to target_dir, and check md5sum. - """ - if not os.path.exists(target_dir): os.makedirs(target_dir) - filepath = os.path.join(target_dir, url.split("/")[-1]) - if not (os.path.exists(filepath) and md5file(filepath) == md5sum): - print("Downloading %s ..." % url) - os.system("wget -c " + url + " -P " + target_dir) - print("\nMD5 Chesksum %s ..." % filepath) - if not md5file(filepath) == md5sum: - raise RuntimeError("MD5 checksum failed.") - else: - print("File exists, skip downloading. (%s)" % filepath) - return filepath - - -def unpack(filepath, target_dir): - """ - Unpack the file to the target_dir. - """ - print("Unpacking %s ..." % filepath) - tar = tarfile.open(filepath) - tar.extractall(target_dir) - tar.close() - - -def create_manifest(data_dir, manifest_path): - """ - Create a manifest json file summarizing the data set, with each line - containing the meta data (i.e. audio filepath, transcription text, audio - duration) of each audio file within the data set. - """ - print("Creating manifest %s ..." % manifest_path) - json_lines = [] - for subfolder, _, filelist in sorted(os.walk(data_dir)): - text_filelist = [ - filename for filename in filelist if filename.endswith('trans.txt') - ] - if len(text_filelist) > 0: - text_filepath = os.path.join(data_dir, subfolder, text_filelist[0]) - for line in open(text_filepath): - segments = line.strip().split() - text = ' '.join(segments[1:]).lower() - audio_filepath = os.path.join(data_dir, subfolder, - segments[0] + '.flac') - audio_data, samplerate = soundfile.read(audio_filepath) - duration = float(len(audio_data)) / samplerate - json_lines.append( - json.dumps({ - 'audio_filepath': audio_filepath, - 'duration': duration, - 'text': text - })) - with codecs.open(manifest_path, 'w', 'utf-8') as out_file: - for line in json_lines: - out_file.write(line + '\n') - - -def prepare_dataset(url, md5sum, target_dir, manifest_path): - """ - Download, unpack and create summmary manifest file. - """ - if not os.path.exists(os.path.join(target_dir, "LibriSpeech")): - # download - filepath = download(url, md5sum, target_dir) - # unpack - unpack(filepath, target_dir) - else: - print("Skip downloading and unpacking. Data already exists in %s." % - target_dir) - # create manifest json file - create_manifest(target_dir, manifest_path) - - -def main(): - prepare_dataset( - url=URL_DEV_CLEAN, - md5sum=MD5_DEV_CLEAN, - target_dir=os.path.join(args.target_dir, "dev-clean"), - manifest_path=args.manifest_prefix + ".dev-clean") - - -if __name__ == '__main__': - main() diff --git a/examples/tiny/run_data.sh b/examples/tiny/run_data.sh index 203d3e2c8..46266daaf 100644 --- a/examples/tiny/run_data.sh +++ b/examples/tiny/run_data.sh @@ -2,6 +2,12 @@ pushd ../.. > /dev/null +# prepare folder +if [ ! -e data/tiny ]; then + mkdir data/tiny +fi + + # download data, generate manifests python data/librispeech/librispeech.py \ --manifest_prefix='data/tiny/manifest' \ From 4969d297d8002de0c15d32342664cb5c756f628a Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Tue, 12 Sep 2017 14:42:15 +0800 Subject: [PATCH 185/335] Correct typos for DS2 README.md. --- README.md | 64 ++++++++++++++++++++++++++++++------------------------- 1 file changed, 35 insertions(+), 29 deletions(-) diff --git a/README.md b/README.md index afa6dd512..7c176d8bb 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # DeepSpeech2 on PaddlePaddle -*DeepSpeech2 on PaddlePaddle* is an open-source implementation of end-to-end Automatic Speech Recognition (ASR) engine, based on [Baidu's Deep Speech 2 paper](http://proceedings.mlr.press/v48/amodei16.pdf), with [PaddlePaddle](https://github.com/PaddlePaddle/Paddle) platform. Our vision is to empower both industrial application and academic research on speech-to-text, via an easy-to-use, efficent and scalable integreted implementation, including training, inferencing & testing module, distributed [PaddleCloud](https://github.com/PaddlePaddle/cloud) training, and demo deployment. Besides, several pre-trained models for both English and Mandarin are also released. +*DeepSpeech2 on PaddlePaddle* is an open-source implementation of end-to-end Automatic Speech Recognition (ASR) engine, based on [Baidu's Deep Speech 2 paper](http://proceedings.mlr.press/v48/amodei16.pdf), with [PaddlePaddle](https://github.com/PaddlePaddle/Paddle) platform. Our vision is to empower both industrial application and academic research on speech recognition, via an easy-to-use, efficient and scalable implementation, including training, inferencing & testing module, distributed [PaddleCloud](https://github.com/PaddlePaddle/cloud) training, and demo deployment. Besides, several pre-trained models for both English and Mandarin are also released. ## Table of Contents - [Prerequisites](#prerequisites) @@ -53,14 +53,14 @@ Let's take a tiny sampled subset of [LibriSpeech dataset](http://www.openslr.org sh run_data.sh ``` - `run_data.sh` will download dataset, generate manifests, collect normalizer' statitics and build vocabulary. Once the data preparation is done, we will find the data (only part of LibriSpeech) downloaded in `~/.cache/paddle/dataset/speech/libri` and the corresponding manifest files generated in `./data/tiny` as well as a mean stddev file and a vocabulary file. It has to be run for the very first time we run this dataset and is reusable for all further experiments. + `run_data.sh` will download dataset, generate manifests, collect normalizer' statistics and build vocabulary. Once the data preparation is done, we will find the data (only part of LibriSpeech) downloaded in `~/.cache/paddle/dataset/speech/libri` and the corresponding manifest files generated in `./data/tiny` as well as a mean stddev file and a vocabulary file. It has to be run for the very first time we run this dataset and is reusable for all further experiments. - Train your own ASR model ``` sh run_train.sh ``` - `run_train.sh` will start a training job, with training logs printed to stdout and model checkpoint of every pass/epoch saved to `./checkpoints/tiny`. We can resume the training from these checkpoints, or use them for inference, evalutiaton and deployment. + `run_train.sh` will start a training job, with training logs printed to stdout and model checkpoint of every pass/epoch saved to `./checkpoints/tiny`. We can resume the training from these checkpoints, or use them for inference, evaluation and deployment. - Case inference with an existing model ``` @@ -83,10 +83,8 @@ Let's take a tiny sampled subset of [LibriSpeech dataset](http://www.openslr.org ``` sh run_test_golden.sh ``` -     -More detailed information are provided in the following sections. -Wish you a happy journey with the DeepSpeech2 ASR engine! +More detailed information are provided in the following sections. Wish you a happy journey with the *DeepSpeech2 on PaddlePaddle* ASR engine! ## Data Preparation @@ -116,11 +114,12 @@ python tools/compute_mean_std.py \ --output_path data/librispeech/mean_std.npz ``` -It will compute the mean and standard deviation of power spectgram feature with 2000 random sampled audio clips listed in `data/librispeech/manifest.train` and save the results to `data/librispeech/mean_std.npz` for further usage. +It will compute the mean and standard deviation of power spectrum feature with 2000 random sampled audio clips listed in `data/librispeech/manifest.train` and save the results to `data/librispeech/mean_std.npz` for further usage. + ### Build Vocabulary -A vocabulary of possible characters is required to convert the transcription into a list of token indices for training, and in docoding, to convert from a list of indices back to text again. Such a character-based vocabulary can be build with `tools/build_vocab.py`. +A vocabulary of possible characters is required to convert the transcription into a list of token indices for training, and in decoding, to convert from a list of indices back to text again. Such a character-based vocabulary can be built with `tools/build_vocab.py`. ``` python tools/build_vocab.py \ @@ -173,14 +172,14 @@ or refer to `example/librispeech/run_train.sh`. ## Data Augmentation Pipeline -Data augmentation has often been a highly effective technique to boost the deep learning performance. We augment our speech data by synthesizing new audios with small random perterbation (label-invariant transformation) added upon raw audios. We don't have to do the syntheses by ourselves, as it is already embeded into the data provider and is done on the fly, randomly for each epoch during training. +Data augmentation has often been a highly effective technique to boost the deep learning performance. We augment our speech data by synthesizing new audios with small random perturbation (label-invariant transformation) added upon raw audios. We don't have to do the syntheses by ourselves, as it is already embedded into the data provider and is done on the fly, randomly for each epoch during training. Six optional augmentation components are provided for us to configured and inserted into the processing pipeline. - Volume Perturbation - Speed Perturbation - Shifting Perturbation - - Online Beyesian normalization + - Online Bayesian normalization - Noise Perturbation (need background noise audio files) - Impulse Response (need impulse audio files) @@ -211,13 +210,20 @@ Be careful when we are utilizing the data augmentation technique, as improper au ### Prepare Language Model -A language model is required to improve the decoder's performance. We have prepared two language models (with lossy compression) for users to download and try. One is for English and the other is for Mandarin. Please refer to `models/lm/download_lm_en.sh` and `models/lm/download_lm_zh.sh` for their urls. If you wish to train your own better language model, please refer to [KenLM](https://github.com/kpu/kenlm) for tutorials. +A language model is required to improve the decoder's performance. We have prepared two language models (with lossy compression) for users to download and try. One is for English and the other is for Mandarin. We can simply run this to download the preprared language models: + +``` +cd models/lm +sh download_lm_en.sh +sh download_lm_ch.sh +``` +If you wish to train your own better language model, please refer to [KenLM](https://github.com/kpu/kenlm) for tutorials. TODO: any other requirements or tips to add? ### Speech-to-text Inference -We provide a inference module `infer.py` to infer, decode and visualize speech-to-text results for several given audio clips. It might help us to have a intuitive and qualitative evaluation of the ASR model's performance. +An inference module caller `infer.py` is provided for us to infer, decode and visualize speech-to-text results for several given audio clips. It might help to have an intuitive and qualitative evaluation of the ASR model's performance. - Inference with GPU: @@ -225,13 +231,13 @@ We provide a inference module `infer.py` to infer, decode and visualize speech-t CUDA_VISIBLE_DEVICES=0 python infer.py --trainer_count 1 ``` -- Inference with CPU: +- Inference with CPUs: ``` python infer.py --use_gpu False --trainer_count 12 ``` -We provide two types of CTC decoders: *CTC greedy decoder* and *CTC beam search decoder*. The *CTC greedy decoder* is an implementation of the simple best-path decoding algorithm, selecting at each timestep the most likely token, thus being greedy and locally optimal. The [*CTC beam search decoder*](https://arxiv.org/abs/1408.2873) otherwise utilizes a heuristic breadth-first gragh search for reaching a near global optimality; it also requires a pre-trained KenLM language model for better scoring and ranking. The decoder type can be set with argument `--decoding_method`. +We provide two types of CTC decoders: *CTC greedy decoder* and *CTC beam search decoder*. The *CTC greedy decoder* is an implementation of the simple best-path decoding algorithm, selecting at each timestep the most likely token, thus being greedy and locally optimal. The [*CTC beam search decoder*](https://arxiv.org/abs/1408.2873) otherwise utilizes a heuristic breadth-first graph search for reaching a near global optimality; it also requires a pre-trained KenLM language model for better scoring and ranking. The decoder type can be set with argument `--decoding_method`. For more help on arguments: @@ -242,15 +248,15 @@ or refer to `example/librispeech/run_infer.sh`. ### Evaluate a Model -To evaluate a model's performance quantitively, we can run: +To evaluate a model's performance quantitatively, we can run: -- Evaluation with GPU: +- Evaluation with GPUs: ``` CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python test.py --trainer_count 8 ``` -- Evaluation with CPU: +- Evaluation with CPUs: ``` python test.py --use_gpu False --trainer_count 12 @@ -267,9 +273,9 @@ or refer to `example/librispeech/run_test.sh`. ## Hyper-parameters Tuning -The hyper-parameters $\alpha$ (coefficient for language model scorer) and $\beta$ (coefficient for word count scorer) for the [*CTC beam search decoder*](https://arxiv.org/abs/1408.2873) often have a significant impact on the decoder's performance. It would be better to re-tune them on a validation set when the accustic model is renewed. +The hyper-parameters $\alpha$ (coefficient for language model scorer) and $\beta$ (coefficient for word count scorer) for the [*CTC beam search decoder*](https://arxiv.org/abs/1408.2873) often have a significant impact on the decoder's performance. It would be better to re-tune them on a validation set when the acoustic model is renewed. -`tools/tune.py` performs a 2-D grid search over the hyper-parameter $\alpha$ and $\beta$. We have to provide the range of $\alpha$ and $\beta$, as well as the number of their attempts. +`tools/tune.py` performs a 2-D grid search over the hyper-parameter $\alpha$ and $\beta$. We must provide the range of $\alpha$ and $\beta$, as well as the number of their attempts. - Tuning with GPU: @@ -304,16 +310,16 @@ TODO: add figure. We provide a cloud training module for users to do the distributed cluster training on [PaddleCloud](https://github.com/PaddlePaddle/cloud), to achieve a much faster training speed with multiple machines. To start with this, please first install PaddleCloud client and register a PaddleCloud account, as described in [PaddleCloud Usage](https://github.com/PaddlePaddle/cloud/blob/develop/doc/usage_cn.md#%E4%B8%8B%E8%BD%BD%E5%B9%B6%E9%85%8D%E7%BD%AEpaddlecloud). -Then, we take the following steps to sumbit a training job: +Then, we take the following steps to submit a training job: -- go to directory: +- Go to directory: ``` cd cloud ``` - Upload data: - Data must be uploaded to PaddleCloud filesystem to be accessed from a cloud job. `pcloud_upload_data.sh` helps do the data packing and uploading: + Data must be uploaded to PaddleCloud filesystem to be accessed within a cloud job. `pcloud_upload_data.sh` helps do the data packing and uploading: ``` sh pcloud_upload_data.sh @@ -326,7 +332,7 @@ Then, we take the following steps to sumbit a training job: - Upload these tar files to PaddleCloud filesystem. - Create cloud manifests by replacing local filesystem paths with PaddleCloud filesystem paths. New manifests will be used to inform the cloud jobs of audio files' location and their meta information. - It has to be done only once for the very first time we do the cloud training. Later on, the data is persisitent on the cloud filesystem and reusable for further job submissions. + It should be done only once for the very first time we do the cloud training. Later, the data is kept persisitent on the cloud filesystem and reusable for further job submissions. For argument details please refer to [Train DeepSpeech2 on PaddleCloud](https://github.com/PaddlePaddle/models/tree/develop/deep_speech_2/cloud). @@ -343,7 +349,7 @@ Then, we take the following steps to sumbit a training job: ``` sh pcloud_submit.sh ``` - we submit a training job to PaddleCloud. And we will see the job name when the submission is finished. Now our training job is running well on the PaddleCloud. + we submit a training job to PaddleCloud. And the job name will be printed when the submission is finished. Now our training job is running well on the PaddleCloud. - Get training logs @@ -369,7 +375,7 @@ TODO: to be added ## Trying Live Demo with Your Own Voice -Until now, we have trained and tested our ASR model qualitatively (`infer.py`) and quantitively (`test.py`) with existing audio files. But we have not yet play the model with our own speech. `deploy/demo_server.py` and `deploy/demo_client.py` helps quickly build up a real-time demo ASR engine with the trained model, enabling us to test and play around with the demo, with our own voice. +Until now, we have trained and tested our ASR model qualitatively (`infer.py`) and quantitatively (`test.py`) with existing audio files. But we have not yet try the model with our own speech. `deploy/demo_server.py` and `deploy/demo_client.py` helps quickly build up a real-time demo ASR engine with the trained model, enabling us to test and play around with the demo, with our own voice. We start the demo's server in one console by: @@ -381,7 +387,7 @@ python deploy/demo_server.py \ --host_port 8086 ``` -For the machine (might be the same or a different machine) to run the demo's client, we have to do the following installation before moving on. +For the machine (might not be the same machine) to run the demo's client, we have to do the following installation before moving on. For example, on MAC OS X: @@ -397,12 +403,12 @@ Then we can start the client in another console by: CUDA_VISIBLE_DEVICES=0 \ python -u deploy/demo_client.py \ --host_ip 'localhost' \ ---host_port 8086 \ +--host_port 8086 ``` -Next, in the client console, press the `whitespace` key, hold, and start speaking. Until we finish our ulterance, we release the key to let the speech-to-text results shown in the console. To quit the client, just press `ESC` key. +Now, in the client console, press the `whitespace` key, hold, and start speaking. Until we finish our utterance, we release the key to let the speech-to-text results shown in the console. To quit the client, just press `ESC` key. -Notice that `deploy/demo_client.py` must be run in a machine with a microphone device, while `deploy/demo_server.py` could be run in one without any audio recording hardware, e.g. any remote server machine. Just be careful to set the `host_ip` and `host_port` argument with the actual accessable IP address and port, if the server and client are running with two seperate machines. Nothing has to be done if they are running in one single machine. +Notice that `deploy/demo_client.py` must be run in a machine with a microphone device, while `deploy/demo_server.py` could be run in one without any audio recording hardware, e.g. any remote server machine. Just be careful to set the `host_ip` and `host_port` argument with the actual accessible IP address and port, if the server and client are running with two separate machines. Nothing should be done if they are running in one single machine. We can also refer to `examples/mandarin/run_demo_server.sh` for example, which will first download a pre-trained Mandarin model (trained with 3000 hours of internal speech data) and then start the demo server with the model. With running `examples/mandarin/run_demo_client.sh`, we can speak Mandarin to test it. If we would like to try some other models, just update `--model_path` argument in the script.   From 35caf5e0b744171634fbc2ea914e6f85a281718a Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Tue, 12 Sep 2017 23:46:50 +0800 Subject: [PATCH 186/335] Add bash code highlight to README.md for DS2. --- README.md | 60 ++++++++++++++++----------------- data/librispeech/librispeech.py | 1 - 2 files changed, 30 insertions(+), 31 deletions(-) diff --git a/README.md b/README.md index 7c176d8bb..d9b989342 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@ Please install the [prerequisites](#prerequisites) above before moving on. -``` +```bash git clone https://github.com/PaddlePaddle/models.git cd models/deep_speech_2 sh setup.sh @@ -42,45 +42,45 @@ Let's take a tiny sampled subset of [LibriSpeech dataset](http://www.openslr.org - Go to directory - ``` + ```bash cd examples/tiny ``` Notice that this is only a toy example with a tiny sampled subset of LibriSpeech. If we would like to try with the complete dataset (would take several days for training), please go to `examples/librispeech` instead. - Prepare the data - ``` + ```bash sh run_data.sh ``` `run_data.sh` will download dataset, generate manifests, collect normalizer' statistics and build vocabulary. Once the data preparation is done, we will find the data (only part of LibriSpeech) downloaded in `~/.cache/paddle/dataset/speech/libri` and the corresponding manifest files generated in `./data/tiny` as well as a mean stddev file and a vocabulary file. It has to be run for the very first time we run this dataset and is reusable for all further experiments. - Train your own ASR model - ``` + ```bash sh run_train.sh ``` `run_train.sh` will start a training job, with training logs printed to stdout and model checkpoint of every pass/epoch saved to `./checkpoints/tiny`. We can resume the training from these checkpoints, or use them for inference, evaluation and deployment. - Case inference with an existing model - ``` + ```bash sh run_infer.sh ``` `run_infer.sh` will show us some speech-to-text decoding results for several (default: 10) samples with the trained model. The performance might not be good now as the current model is only trained with a toy subset of LibriSpeech. To see the results with a better model, we can download a well-trained (trained for several days, with the complete LibriSpeech) model and do the inference: - ``` + ```bash sh run_infer_golden.sh ``` - Evaluate an existing model - ``` + ```bash sh run_test.sh ``` `run_test.sh` will evaluate the model with Word Error Rate (or Character Error Rate) measurement. Similarly, we can also download a well-trained model and test its performance: - ``` + ```bash sh run_test_golden.sh ``` @@ -106,7 +106,7 @@ For how to generate such manifest files, please refer to `data/librispeech/libri To perform z-score normalization (zero-mean, unit stddev) upon audio features, we have to estimate in advance the mean and standard deviation of the features, with some training samples: -``` +```bash python tools/compute_mean_std.py \ --num_samples 2000 \ --specgram_type linear \ @@ -121,7 +121,7 @@ It will compute the mean and standard deviation of power spectrum feature with 2 A vocabulary of possible characters is required to convert the transcription into a list of token indices for training, and in decoding, to convert from a list of indices back to text again. Such a character-based vocabulary can be built with `tools/build_vocab.py`. -``` +```bash python tools/build_vocab.py \ --count_threshold 0 \ --vocab_path data/librispeech/eng_vocab.txt \ @@ -134,7 +134,7 @@ It will write a vocabuary file `data/librispeeech/eng_vocab.txt` with all transc For more help on arguments: -``` +```bash python data/librispeech/librispeech.py --help python tools/compute_mean_std.py --help python tools/build_vocab.py --help @@ -165,7 +165,7 @@ python tools/build_vocab.py --help For more help on arguments: -``` +```bash python train.py --help ``` or refer to `example/librispeech/run_train.sh`. @@ -212,7 +212,7 @@ Be careful when we are utilizing the data augmentation technique, as improper au A language model is required to improve the decoder's performance. We have prepared two language models (with lossy compression) for users to download and try. One is for English and the other is for Mandarin. We can simply run this to download the preprared language models: -``` +```bash cd models/lm sh download_lm_en.sh sh download_lm_ch.sh @@ -227,13 +227,13 @@ An inference module caller `infer.py` is provided for us to infer, decode and vi - Inference with GPU: - ``` + ```bash CUDA_VISIBLE_DEVICES=0 python infer.py --trainer_count 1 ``` - Inference with CPUs: - ``` + ```bash python infer.py --use_gpu False --trainer_count 12 ``` @@ -252,13 +252,13 @@ To evaluate a model's performance quantitatively, we can run: - Evaluation with GPUs: - ``` + ```bash CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python test.py --trainer_count 8 ``` - Evaluation with CPUs: - ``` + ```bash python test.py --use_gpu False --trainer_count 12 ``` @@ -266,7 +266,7 @@ The error rate (default: word error rate; can be set with `--error_rate_type`) w For more help on arguments: -``` +```bash python test.py --help ``` or refer to `example/librispeech/run_test.sh`. @@ -279,7 +279,7 @@ The hyper-parameters $\alpha$ (coefficient for language model scorer) and $\beta - Tuning with GPU: - ``` + ```bash CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ python tools/tune.py \ --trainer_count 8 \ @@ -293,13 +293,13 @@ The hyper-parameters $\alpha$ (coefficient for language model scorer) and $\beta - Tuning with CPU: - ``` + ```bash python tools/tune.py --use_gpu False ``` After tuning, we can reset $\alpha$ and $\beta$ in the inference and evaluation modules to see if they really help improve the ASR performance. -``` +```bash python tune.py --help ``` or refer to `example/librispeech/run_tune.sh`. @@ -314,14 +314,14 @@ Then, we take the following steps to submit a training job: - Go to directory: - ``` + ```bash cd cloud ``` - Upload data: Data must be uploaded to PaddleCloud filesystem to be accessed within a cloud job. `pcloud_upload_data.sh` helps do the data packing and uploading: - ``` + ```bash sh pcloud_upload_data.sh ``` @@ -346,7 +346,7 @@ Then, we take the following steps to submit a training job: By running: - ``` + ```bash sh pcloud_submit.sh ``` we submit a training job to PaddleCloud. And the job name will be printed when the submission is finished. Now our training job is running well on the PaddleCloud. @@ -355,12 +355,12 @@ Then, we take the following steps to submit a training job: Run this to list all the jobs you have submitted, as well as their running status: - ``` + ```bash paddlecloud get jobs ``` Run this, the corresponding job's logs will be printed. - ``` + ```bash paddlecloud logs -n 10000 $REPLACED_WITH_YOUR_ACTUAL_JOB_NAME ``` @@ -379,7 +379,7 @@ Until now, we have trained and tested our ASR model qualitatively (`infer.py`) a We start the demo's server in one console by: -``` +```bash CUDA_VISIBLE_DEVICES=0 \ python deploy/demo_server.py \ --trainer_count 1 \ @@ -391,7 +391,7 @@ For the machine (might not be the same machine) to run the demo's client, we hav For example, on MAC OS X: -``` +```bash brew install portaudio pip install pyaudio pip install pynput @@ -399,7 +399,7 @@ pip install pynput Then we can start the client in another console by: -``` +```bash CUDA_VISIBLE_DEVICES=0 \ python -u deploy/demo_client.py \ --host_ip 'localhost' \ @@ -414,7 +414,7 @@ We can also refer to `examples/mandarin/run_demo_server.sh` for example, which w For more help on arguments: -``` +```bash python deploy/demo_server.py --help python deploy/demo_client.py --help ``` diff --git a/data/librispeech/librispeech.py b/data/librispeech/librispeech.py index e2ad8d413..0709136e2 100644 --- a/data/librispeech/librispeech.py +++ b/data/librispeech/librispeech.py @@ -65,7 +65,6 @@ def download(url, md5sum, target_dir): if not (os.path.exists(filepath) and md5file(filepath) == md5sum): print("Downloading %s ..." % url) ret = os.system("wget -c " + url + " -P " + target_dir) - print(ret) print("\nMD5 Chesksum %s ..." % filepath) if not md5file(filepath) == md5sum: raise RuntimeError("MD5 checksum failed.") From ac56a2f249a853653e1d1fe7b173475c67c90a91 Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Wed, 13 Sep 2017 15:36:34 +0800 Subject: [PATCH 187/335] Update READMD.md and other details by following reviewers comments. --- README.md | 64 +++++++++++++++---------------- deploy/demo_server.py | 2 +- examples/librispeech/run_train.sh | 1 + examples/tiny/run_train.sh | 1 + infer.py | 4 +- test.py | 4 +- tools/tune.py | 4 +- train.py | 2 +- 8 files changed, 42 insertions(+), 40 deletions(-) diff --git a/README.md b/README.md index d9b989342..055bd439e 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # DeepSpeech2 on PaddlePaddle -*DeepSpeech2 on PaddlePaddle* is an open-source implementation of end-to-end Automatic Speech Recognition (ASR) engine, based on [Baidu's Deep Speech 2 paper](http://proceedings.mlr.press/v48/amodei16.pdf), with [PaddlePaddle](https://github.com/PaddlePaddle/Paddle) platform. Our vision is to empower both industrial application and academic research on speech recognition, via an easy-to-use, efficient and scalable implementation, including training, inferencing & testing module, distributed [PaddleCloud](https://github.com/PaddlePaddle/cloud) training, and demo deployment. Besides, several pre-trained models for both English and Mandarin are also released. +*DeepSpeech2 on PaddlePaddle* is an open-source implementation of end-to-end Automatic Speech Recognition (ASR) engine, based on [Baidu's Deep Speech 2 paper](http://proceedings.mlr.press/v48/amodei16.pdf), with [PaddlePaddle](https://github.com/PaddlePaddle/Paddle) platform. Our vision is to empower both industrial application and academic research on speech recognition, via an easy-to-use, efficient and scalable implementation, including training, inference & testing module, distributed [PaddleCloud](https://github.com/PaddlePaddle/cloud) training, and demo deployment. Besides, several pre-trained models for both English and Mandarin are also released. ## Table of Contents - [Prerequisites](#prerequisites) @@ -19,12 +19,12 @@ - [Questions and Help](#questions-and-help) ## Prerequisites -- Only support Python 2.7 +- Python 2.7 only supported - PaddlePaddle the latest version (please refer to the [Installation Guide](https://github.com/PaddlePaddle/Paddle#installation)) ## Installation -Please install the [prerequisites](#prerequisites) above before moving on. +Please make sure the above [prerequisites](#prerequisites) have been satisfied before moving on. ```bash git clone https://github.com/PaddlePaddle/models.git @@ -34,9 +34,9 @@ sh setup.sh ## Getting Started -Several shell scripts provided in `./examples` will help us to quickly give it a try, for most major modules, including data preparation, model training, case inference and model evaluation, with a few public dataset (e.g. [LibriSpeech](http://www.openslr.org/12/), [Aishell](https://github.com/kaldi-asr/kaldi/tree/master/egs/aishell)). Reading these examples will also help us understand how to make it work with our own data. +Several shell scripts provided in `./examples` will help us to quickly give it a try, for most major modules, including data preparation, model training, case inference and model evaluation, with a few public dataset (e.g. [LibriSpeech](http://www.openslr.org/12/), [Aishell](http://www.openslr.org/33)). Reading these examples will also help you to understand how to make it work with your own data. -Some of the scripts in `./examples` are configured with 8 GPUs. If you don't have 8 GPUs available, please modify `CUDA_VISIBLE_DEVICE` and `--trainer_count`. If you don't have any GPU available, please set `--use_gpu` to False to use CPUs instead. +Some of the scripts in `./examples` are configured with 8 GPUs. If you don't have 8 GPUs available, please modify `CUDA_VISIBLE_DEVICES` and `--trainer_count`. If you don't have any GPU available, please set `--use_gpu` to False to use CPUs instead. Let's take a tiny sampled subset of [LibriSpeech dataset](http://www.openslr.org/12/) for instance. @@ -46,28 +46,28 @@ Let's take a tiny sampled subset of [LibriSpeech dataset](http://www.openslr.org cd examples/tiny ``` - Notice that this is only a toy example with a tiny sampled subset of LibriSpeech. If we would like to try with the complete dataset (would take several days for training), please go to `examples/librispeech` instead. + Notice that this is only a toy example with a tiny sampled subset of LibriSpeech. If you would like to try with the complete dataset (would take several days for training), please go to `examples/librispeech` instead. - Prepare the data ```bash sh run_data.sh ``` - `run_data.sh` will download dataset, generate manifests, collect normalizer' statistics and build vocabulary. Once the data preparation is done, we will find the data (only part of LibriSpeech) downloaded in `~/.cache/paddle/dataset/speech/libri` and the corresponding manifest files generated in `./data/tiny` as well as a mean stddev file and a vocabulary file. It has to be run for the very first time we run this dataset and is reusable for all further experiments. + `run_data.sh` will download dataset, generate manifests, collect normalizer's statistics and build vocabulary. Once the data preparation is done, you will find the data (only part of LibriSpeech) downloaded in `~/.cache/paddle/dataset/speech/libri` and the corresponding manifest files generated in `./data/tiny` as well as a mean stddev file and a vocabulary file. It has to be run for the very first time you run this dataset and is reusable for all further experiments. - Train your own ASR model ```bash sh run_train.sh ``` - `run_train.sh` will start a training job, with training logs printed to stdout and model checkpoint of every pass/epoch saved to `./checkpoints/tiny`. We can resume the training from these checkpoints, or use them for inference, evaluation and deployment. + `run_train.sh` will start a training job, with training logs printed to stdout and model checkpoint of every pass/epoch saved to `./checkpoints/tiny`. These checkpoints could be used for training resuming, inference, evaluation and deployment. - Case inference with an existing model ```bash sh run_infer.sh ``` - `run_infer.sh` will show us some speech-to-text decoding results for several (default: 10) samples with the trained model. The performance might not be good now as the current model is only trained with a toy subset of LibriSpeech. To see the results with a better model, we can download a well-trained (trained for several days, with the complete LibriSpeech) model and do the inference: + `run_infer.sh` will show us some speech-to-text decoding results for several (default: 10) samples with the trained model. The performance might not be good now as the current model is only trained with a toy subset of LibriSpeech. To see the results with a better model, you can download a well-trained (trained for several days, with the complete LibriSpeech) model and do the inference: ```bash sh run_infer_golden.sh @@ -78,7 +78,7 @@ Let's take a tiny sampled subset of [LibriSpeech dataset](http://www.openslr.org sh run_test.sh ``` - `run_test.sh` will evaluate the model with Word Error Rate (or Character Error Rate) measurement. Similarly, we can also download a well-trained model and test its performance: + `run_test.sh` will evaluate the model with Word Error Rate (or Character Error Rate) measurement. Similarly, you can also download a well-trained model and test its performance: ```bash sh run_test_golden.sh @@ -100,7 +100,7 @@ More detailed information are provided in the following sections. Wish you a hap To use your custom data, you only need to generate such manifest files to summarize the dataset. Given such summarized manifests, training, inference and all other modules can be aware of where to access the audio files, as well as their meta data including the transcription labels. -For how to generate such manifest files, please refer to `data/librispeech/librispeech.py`, which download and generate manifests for LibriSpeech dataset. +For how to generate such manifest files, please refer to `data/librispeech/librispeech.py`, which will download data and generate manifest files for LibriSpeech dataset. ### Compute Mean & Stddev for Normalizer @@ -142,7 +142,7 @@ python tools/build_vocab.py --help ## Training a model -`train.py` is the main caller of the training module. We show several examples of usage below. +`train.py` is the main caller of the training module. Examples of usage are shown below. - Start training from scratch with 8 GPUs: @@ -172,9 +172,9 @@ or refer to `example/librispeech/run_train.sh`. ## Data Augmentation Pipeline -Data augmentation has often been a highly effective technique to boost the deep learning performance. We augment our speech data by synthesizing new audios with small random perturbation (label-invariant transformation) added upon raw audios. We don't have to do the syntheses by ourselves, as it is already embedded into the data provider and is done on the fly, randomly for each epoch during training. +Data augmentation has often been a highly effective technique to boost the deep learning performance. We augment our speech data by synthesizing new audios with small random perturbation (label-invariant transformation) added upon raw audios. You don't have to do the syntheses on your own, as it is already embedded into the data provider and is done on the fly, randomly for each epoch during training. -Six optional augmentation components are provided for us to configured and inserted into the processing pipeline. +Six optional augmentation components are provided to be selected, configured and inserted into the processing pipeline. - Volume Perturbation - Speed Perturbation @@ -183,7 +183,7 @@ Six optional augmentation components are provided for us to configured and inser - Noise Perturbation (need background noise audio files) - Impulse Response (need impulse audio files) -In order to inform the trainer of what augmentation components we need and what their processing orders are, we are required to prepare a *augmentation configuration file* in [JSON](http://www.json.org/) format. For example: +In order to inform the trainer of what augmentation components are needed and what their processing orders are, it is required to prepare in advance a *augmentation configuration file* in [JSON](http://www.json.org/) format. For example: ``` [{ @@ -204,13 +204,13 @@ When the `--augment_conf_file` argument of `trainer.py` is set to the path of th For other configuration examples, please refer to `conf/augmenatation.config.example`. -Be careful when we are utilizing the data augmentation technique, as improper augmentation will do harm to the training, due to the enlarged train-test gap. +Be careful when utilizing the data augmentation technique, as improper augmentation will do harm to the training, due to the enlarged train-test gap. ## Inference and Evaluation ### Prepare Language Model -A language model is required to improve the decoder's performance. We have prepared two language models (with lossy compression) for users to download and try. One is for English and the other is for Mandarin. We can simply run this to download the preprared language models: +A language model is required to improve the decoder's performance. We have prepared two language models (with lossy compression) for users to download and try. One is for English and the other is for Mandarin. Users can simply run this to download the preprared language models: ```bash cd models/lm @@ -223,7 +223,7 @@ TODO: any other requirements or tips to add? ### Speech-to-text Inference -An inference module caller `infer.py` is provided for us to infer, decode and visualize speech-to-text results for several given audio clips. It might help to have an intuitive and qualitative evaluation of the ASR model's performance. +An inference module caller `infer.py` is provided to infer, decode and visualize speech-to-text results for several given audio clips. It might help to have an intuitive and qualitative evaluation of the ASR model's performance. - Inference with GPU: @@ -248,7 +248,7 @@ or refer to `example/librispeech/run_infer.sh`. ### Evaluate a Model -To evaluate a model's performance quantitatively, we can run: +To evaluate a model's performance quantitatively, please run: - Evaluation with GPUs: @@ -275,7 +275,7 @@ or refer to `example/librispeech/run_test.sh`. The hyper-parameters $\alpha$ (coefficient for language model scorer) and $\beta$ (coefficient for word count scorer) for the [*CTC beam search decoder*](https://arxiv.org/abs/1408.2873) often have a significant impact on the decoder's performance. It would be better to re-tune them on a validation set when the acoustic model is renewed. -`tools/tune.py` performs a 2-D grid search over the hyper-parameter $\alpha$ and $\beta$. We must provide the range of $\alpha$ and $\beta$, as well as the number of their attempts. +`tools/tune.py` performs a 2-D grid search over the hyper-parameter $\alpha$ and $\beta$. You must provide the range of $\alpha$ and $\beta$, as well as the number of their attempts. - Tuning with GPU: @@ -297,7 +297,7 @@ The hyper-parameters $\alpha$ (coefficient for language model scorer) and $\beta python tools/tune.py --use_gpu False ``` -After tuning, we can reset $\alpha$ and $\beta$ in the inference and evaluation modules to see if they really help improve the ASR performance. +After tuning, you can reset $\alpha$ and $\beta$ in the inference and evaluation modules to see if they really help improve the ASR performance. ```bash python tune.py --help @@ -308,9 +308,9 @@ TODO: add figure. ## Distributed Cloud Training -We provide a cloud training module for users to do the distributed cluster training on [PaddleCloud](https://github.com/PaddlePaddle/cloud), to achieve a much faster training speed with multiple machines. To start with this, please first install PaddleCloud client and register a PaddleCloud account, as described in [PaddleCloud Usage](https://github.com/PaddlePaddle/cloud/blob/develop/doc/usage_cn.md#%E4%B8%8B%E8%BD%BD%E5%B9%B6%E9%85%8D%E7%BD%AEpaddlecloud). +We also provide a cloud training module for users to do the distributed cluster training on [PaddleCloud](https://github.com/PaddlePaddle/cloud), to achieve a much faster training speed with multiple machines. To start with this, please first install PaddleCloud client and register a PaddleCloud account, as described in [PaddleCloud Usage](https://github.com/PaddlePaddle/cloud/blob/develop/doc/usage_cn.md#%E4%B8%8B%E8%BD%BD%E5%B9%B6%E9%85%8D%E7%BD%AEpaddlecloud). -Then, we take the following steps to submit a training job: +Please take the following steps to submit a training job: - Go to directory: @@ -332,7 +332,7 @@ Then, we take the following steps to submit a training job: - Upload these tar files to PaddleCloud filesystem. - Create cloud manifests by replacing local filesystem paths with PaddleCloud filesystem paths. New manifests will be used to inform the cloud jobs of audio files' location and their meta information. - It should be done only once for the very first time we do the cloud training. Later, the data is kept persisitent on the cloud filesystem and reusable for further job submissions. + It should be done only once for the very first time to do the cloud training. Later, the data is kept persisitent on the cloud filesystem and reusable for further job submissions. For argument details please refer to [Train DeepSpeech2 on PaddleCloud](https://github.com/PaddlePaddle/models/tree/develop/deep_speech_2/cloud). @@ -349,7 +349,7 @@ Then, we take the following steps to submit a training job: ```bash sh pcloud_submit.sh ``` - we submit a training job to PaddleCloud. And the job name will be printed when the submission is finished. Now our training job is running well on the PaddleCloud. + a training job has been submitted to PaddleCloud, with the job name printed to the console. - Get training logs @@ -375,9 +375,9 @@ TODO: to be added ## Trying Live Demo with Your Own Voice -Until now, we have trained and tested our ASR model qualitatively (`infer.py`) and quantitatively (`test.py`) with existing audio files. But we have not yet try the model with our own speech. `deploy/demo_server.py` and `deploy/demo_client.py` helps quickly build up a real-time demo ASR engine with the trained model, enabling us to test and play around with the demo, with our own voice. +Until now, an ASR model is trained and tested qualitatively (`infer.py`) and quantitatively (`test.py`) with existing audio files. But it is not yet tested with your own speech. `deploy/demo_server.py` and `deploy/demo_client.py` helps quickly build up a real-time demo ASR engine with the trained model, enabling you to test and play around with the demo, with your own voice. -We start the demo's server in one console by: +To start the demo's server, please run this in one console: ```bash CUDA_VISIBLE_DEVICES=0 \ @@ -387,7 +387,7 @@ python deploy/demo_server.py \ --host_port 8086 ``` -For the machine (might not be the same machine) to run the demo's client, we have to do the following installation before moving on. +For the machine (might not be the same machine) to run the demo's client, please do the following installation before moving on. For example, on MAC OS X: @@ -397,7 +397,7 @@ pip install pyaudio pip install pynput ``` -Then we can start the client in another console by: +Then to start the client, please run this in another console: ```bash CUDA_VISIBLE_DEVICES=0 \ @@ -406,11 +406,11 @@ python -u deploy/demo_client.py \ --host_port 8086 ``` -Now, in the client console, press the `whitespace` key, hold, and start speaking. Until we finish our utterance, we release the key to let the speech-to-text results shown in the console. To quit the client, just press `ESC` key. +Now, in the client console, press the `whitespace` key, hold, and start speaking. Until finishing your utterance, release the key to let the speech-to-text results shown in the console. To quit the client, just press `ESC` key. -Notice that `deploy/demo_client.py` must be run in a machine with a microphone device, while `deploy/demo_server.py` could be run in one without any audio recording hardware, e.g. any remote server machine. Just be careful to set the `host_ip` and `host_port` argument with the actual accessible IP address and port, if the server and client are running with two separate machines. Nothing should be done if they are running in one single machine. +Notice that `deploy/demo_client.py` must be run on a machine with a microphone device, while `deploy/demo_server.py` could be run on one without any audio recording hardware, e.g. any remote server machine. Just be careful to set the `host_ip` and `host_port` argument with the actual accessible IP address and port, if the server and client are running with two separate machines. Nothing should be done if they are running on one single machine. -We can also refer to `examples/mandarin/run_demo_server.sh` for example, which will first download a pre-trained Mandarin model (trained with 3000 hours of internal speech data) and then start the demo server with the model. With running `examples/mandarin/run_demo_client.sh`, we can speak Mandarin to test it. If we would like to try some other models, just update `--model_path` argument in the script.   +Please also refer to `examples/mandarin/run_demo_server.sh`, which will first download a pre-trained Mandarin model (trained with 3000 hours of internal speech data) and then start the demo server with the model. With running `examples/mandarin/run_demo_client.sh`, you can speak Mandarin to test it. If you would like to try some other models, just update `--model_path` argument in the script.   For more help on arguments: diff --git a/deploy/demo_server.py b/deploy/demo_server.py index 2d3931f74..a7157001c 100644 --- a/deploy/demo_server.py +++ b/deploy/demo_server.py @@ -46,7 +46,7 @@ add_arg('vocab_path', str, 'data/librispeech/eng_vocab.txt', "Filepath of vocabulary.") add_arg('model_path', str, - './checkpoints/params.latest.tar.gz', + './checkpoints/libri/params.latest.tar.gz', "If None, the training starts from scratch, " "otherwise, it resumes from the pre-trained model.") add_arg('lang_model_path', str, diff --git a/examples/librispeech/run_train.sh b/examples/librispeech/run_train.sh index 5485475e9..07575dde1 100644 --- a/examples/librispeech/run_train.sh +++ b/examples/librispeech/run_train.sh @@ -3,6 +3,7 @@ pushd ../.. > /dev/null # train model +# if you wish to resume from an exists model, uncomment --init_model_path CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ python -u train.py \ --batch_size=512 \ diff --git a/examples/tiny/run_train.sh b/examples/tiny/run_train.sh index c66ec4e56..74d82712e 100644 --- a/examples/tiny/run_train.sh +++ b/examples/tiny/run_train.sh @@ -3,6 +3,7 @@ pushd ../.. > /dev/null # train model +# if you wish to resume from an exists model, uncomment --init_model_path CUDA_VISIBLE_DEVICES=0,1,2,3 \ python -u train.py \ --batch_size=16 \ diff --git a/infer.py b/infer.py index 73e200b49..d9c4c6776 100644 --- a/infer.py +++ b/infer.py @@ -38,10 +38,10 @@ add_arg('vocab_path', str, 'data/librispeech/vocab.txt', "Filepath of vocabulary.") add_arg('lang_model_path', str, - 'model_zoo/lm/common_crawl_00.prune01111.trie.klm', + 'models/lm/common_crawl_00.prune01111.trie.klm', "Filepath for language model.") add_arg('model_path', str, - './checkpoints/params.latest.tar.gz', + './checkpoints/libri/params.latest.tar.gz', "If None, the training starts from scratch, " "otherwise, it resumes from the pre-trained model.") add_arg('decoding_method', str, diff --git a/test.py b/test.py index 791bfd585..18089f332 100644 --- a/test.py +++ b/test.py @@ -39,11 +39,11 @@ add_arg('vocab_path', str, 'data/librispeech/vocab.txt', "Filepath of vocabulary.") add_arg('model_path', str, - './checkpoints/params.latest.tar.gz', + './checkpoints/libri/params.latest.tar.gz', "If None, the training starts from scratch, " "otherwise, it resumes from the pre-trained model.") add_arg('lang_model_path', str, - 'model_zoo/lm/common_crawl_00.prune01111.trie.klm', + 'models/lm/common_crawl_00.prune01111.trie.klm', "Filepath for language model.") add_arg('decoding_method', str, 'ctc_beam_search', diff --git a/tools/tune.py b/tools/tune.py index 25e495f19..96c25a3eb 100644 --- a/tools/tune.py +++ b/tools/tune.py @@ -44,10 +44,10 @@ add_arg('vocab_path', str, 'data/librispeech/vocab.txt', "Filepath of vocabulary.") add_arg('lang_model_path', str, - 'model_zoo/lm/common_crawl_00.prune01111.trie.klm', + 'models/lm/common_crawl_00.prune01111.trie.klm', "Filepath for language model.") add_arg('model_path', str, - './checkpoints/params.latest.tar.gz', + './checkpoints/libri/params.latest.tar.gz', "If None, the training starts from scratch, " "otherwise, it resumes from the pre-trained model.") add_arg('error_rate_type', str, diff --git a/train.py b/train.py index bbf1cd729..406484a18 100644 --- a/train.py +++ b/train.py @@ -48,7 +48,7 @@ add_arg('init_model_path', str, "If None, the training starts from scratch, " "otherwise, it resumes from the pre-trained model.") add_arg('output_model_dir', str, - "./checkpoints", + "./checkpoints/libri", "Directory for saving checkpoints.") add_arg('augment_conf_path',str, 'conf/augmentation.config', From 351f61e36664dd78b3100445c0c22151bf25129b Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Wed, 13 Sep 2017 17:34:59 +0800 Subject: [PATCH 188/335] Update RAEDME.md and librispeech.py by following Yaming's review. --- README.md | 2 +- data/librispeech/librispeech.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 055bd439e..9d9d4c77e 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,7 @@ sh setup.sh Several shell scripts provided in `./examples` will help us to quickly give it a try, for most major modules, including data preparation, model training, case inference and model evaluation, with a few public dataset (e.g. [LibriSpeech](http://www.openslr.org/12/), [Aishell](http://www.openslr.org/33)). Reading these examples will also help you to understand how to make it work with your own data. -Some of the scripts in `./examples` are configured with 8 GPUs. If you don't have 8 GPUs available, please modify `CUDA_VISIBLE_DEVICES` and `--trainer_count`. If you don't have any GPU available, please set `--use_gpu` to False to use CPUs instead. +Some of the scripts in `./examples` are configured with 8 GPUs. If you don't have 8 GPUs available, please modify `CUDA_VISIBLE_DEVICES` and `--trainer_count`. If you don't have any GPU available, please set `--use_gpu` to False to use CPUs instead. Besides, if out-of-memory problem occurs, just reduce `--batch_size` to fit. Let's take a tiny sampled subset of [LibriSpeech dataset](http://www.openslr.org/12/) for instance. diff --git a/data/librispeech/librispeech.py b/data/librispeech/librispeech.py index 0709136e2..8dce359a4 100644 --- a/data/librispeech/librispeech.py +++ b/data/librispeech/librispeech.py @@ -64,7 +64,7 @@ def download(url, md5sum, target_dir): filepath = os.path.join(target_dir, url.split("/")[-1]) if not (os.path.exists(filepath) and md5file(filepath) == md5sum): print("Downloading %s ..." % url) - ret = os.system("wget -c " + url + " -P " + target_dir) + os.system("wget -c " + url + " -P " + target_dir) print("\nMD5 Chesksum %s ..." % filepath) if not md5file(filepath) == md5sum: raise RuntimeError("MD5 checksum failed.") From 42efa720cbc68dbf608cdbe4dda88f2314bc9275 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Wed, 13 Sep 2017 23:08:30 +0800 Subject: [PATCH 189/335] add __init__.py in decoders/swig --- decoders/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 decoders/__init__.py diff --git a/decoders/__init__.py b/decoders/__init__.py new file mode 100644 index 000000000..e69de29bb From e0ab51f46ee291075734d0267520ffe68d3e224e Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Thu, 14 Sep 2017 11:46:59 +0800 Subject: [PATCH 190/335] move deprecated decoders --- model_utils/decoder.py => decoders/decoder_deprecated.py | 0 model_utils/lm_scorer.py => decoders/lm_scorer_deprecated.py | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename model_utils/decoder.py => decoders/decoder_deprecated.py (100%) rename model_utils/lm_scorer.py => decoders/lm_scorer_deprecated.py (100%) diff --git a/model_utils/decoder.py b/decoders/decoder_deprecated.py similarity index 100% rename from model_utils/decoder.py rename to decoders/decoder_deprecated.py diff --git a/model_utils/lm_scorer.py b/decoders/lm_scorer_deprecated.py similarity index 100% rename from model_utils/lm_scorer.py rename to decoders/lm_scorer_deprecated.py From cd635cf6f3e15dab92ddd44d9a111d2a8d596f28 Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Fri, 15 Sep 2017 19:08:50 +0800 Subject: [PATCH 191/335] Release librispeech model url. --- examples/librispeech/run_infer_golden.sh | 2 +- examples/librispeech/run_test_golden.sh | 2 +- models/librispeech/download_model.sh | 4 ++-- utils/utility.sh | 5 ++--- 4 files changed, 6 insertions(+), 7 deletions(-) diff --git a/examples/librispeech/run_infer_golden.sh b/examples/librispeech/run_infer_golden.sh index 32e9d8623..679bd1bf8 100644 --- a/examples/librispeech/run_infer_golden.sh +++ b/examples/librispeech/run_infer_golden.sh @@ -36,7 +36,7 @@ python -u infer.py \ --use_gru=False \ --use_gpu=True \ --share_rnn_weights=True \ ---infer_manifest='data/tiny/manifest.test-clean' \ +--infer_manifest='data/librispeech/manifest.test-clean' \ --mean_std_path='models/librispeech/mean_std.npz' \ --vocab_path='models/librispeech/vocab.txt' \ --model_path='models/librispeech/params.tar.gz' \ diff --git a/examples/librispeech/run_test_golden.sh b/examples/librispeech/run_test_golden.sh index 080c3c062..a505cdc79 100644 --- a/examples/librispeech/run_test_golden.sh +++ b/examples/librispeech/run_test_golden.sh @@ -37,7 +37,7 @@ python -u test.py \ --use_gru=False \ --use_gpu=True \ --share_rnn_weights=True \ ---test_manifest='data/tiny/manifest.test-clean' \ +--test_manifest='data/librispeech/manifest.test-clean' \ --mean_std_path='models/librispeech/mean_std.npz' \ --vocab_path='models/librispeech/vocab.txt' \ --model_path='models/librispeech/params.tar.gz' \ diff --git a/models/librispeech/download_model.sh b/models/librispeech/download_model.sh index 4408f6c1c..26cccdfd5 100644 --- a/models/librispeech/download_model.sh +++ b/models/librispeech/download_model.sh @@ -3,8 +3,8 @@ source ../../utils/utility.sh # TODO: add urls -URL='to-be-added' -MD5=5b4af224b26c1dc4dd972b7d32f2f52a +URL='http://cloud.dlnel.org/filepub/?uuid=17404caf-cf19-492f-9707-1fad07c19aae' +MD5=ea5024a457a91179472f6dfee60e053d TARGET=./librispeech_model.tar.gz diff --git a/utils/utility.sh b/utils/utility.sh index 4f617bfa9..f242b7640 100644 --- a/utils/utility.sh +++ b/utils/utility.sh @@ -11,10 +11,9 @@ download() { fi fi - wget -c $URL -P `dirname "$TARGET"` + wget -c $URL -O "$TARGET" md5_result=`md5sum $TARGET | awk -F[' '] '{print $1}'` - if [ $MD5 == $md5_result ]; then - echo "Fail to download the language model!" + if [ ! $MD5 == $md5_result ]; then return 1 fi } From fb75f159a4b1e67a1103db26db8daf76e38559a4 Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Fri, 15 Sep 2017 19:50:59 +0800 Subject: [PATCH 192/335] Publish urls for aishell model and chinese language model. --- models/aishell/download_model.sh | 19 +++++++++++++++++++ models/librispeech/download_model.sh | 1 - models/lm/download_lm_ch.sh | 18 ++++++++++++++++++ 3 files changed, 37 insertions(+), 1 deletion(-) create mode 100644 models/aishell/download_model.sh create mode 100644 models/lm/download_lm_ch.sh diff --git a/models/aishell/download_model.sh b/models/aishell/download_model.sh new file mode 100644 index 000000000..4368ee55a --- /dev/null +++ b/models/aishell/download_model.sh @@ -0,0 +1,19 @@ +#! /usr/bin/bash + +source ../../utils/utility.sh + +URL='http://cloud.dlnel.org/filepub/?uuid=6c83b9d8-3255-4adf-9726-0fe0be3d0274' +MD5=28521a58552885a81cf92a1e9b133a71 +TARGET=./aishell_model.tar.gz + + +echo "Download Aishell model ..." +download $URL $MD5 $TARGET +if [ $? -ne 0 ]; then + echo "Fail to download Aishell model!" + exit 1 +fi +tar -zxvf $TARGET + + +exit 0 diff --git a/models/librispeech/download_model.sh b/models/librispeech/download_model.sh index 26cccdfd5..b5fcd7d8c 100644 --- a/models/librispeech/download_model.sh +++ b/models/librispeech/download_model.sh @@ -2,7 +2,6 @@ source ../../utils/utility.sh -# TODO: add urls URL='http://cloud.dlnel.org/filepub/?uuid=17404caf-cf19-492f-9707-1fad07c19aae' MD5=ea5024a457a91179472f6dfee60e053d TARGET=./librispeech_model.tar.gz diff --git a/models/lm/download_lm_ch.sh b/models/lm/download_lm_ch.sh new file mode 100644 index 000000000..7f1c47a27 --- /dev/null +++ b/models/lm/download_lm_ch.sh @@ -0,0 +1,18 @@ +#! /usr/bin/bash + +source ../../utils/utility.sh + +URL=http://cloud.dlnel.org/filepub/?uuid=d21861e4-4ed6-45bb-ad8e-ae417a43195e +MD5="29e02312deb2e59b3c8686c7966d4fe3" +TARGET=./zh_giga.no_cna_cmn.prune01244.klm + + +echo "Download language model ..." +download $URL $MD5 $TARGET +if [ $? -ne 0 ]; then + echo "Fail to download the language model!" + exit 1 +fi + + +exit 0 From a18e6a7eda2a936c567feae67bbab7bd732c8d17 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Fri, 15 Sep 2017 22:30:40 +0800 Subject: [PATCH 193/335] refine by following review comments --- README.md | 13 -- data_utils/featurizer/text_featurizer.py | 2 + decoders/swig/ctc_decoders.cpp | 156 +++++++++++------------ decoders/swig/ctc_decoders.h | 24 ++-- decoders/swig/decoder_utils.h | 16 +++ decoders/swig_wrapper.py | 16 +-- examples/librispeech/run_test_golden.sh | 8 +- infer.py | 9 +- model_utils/model.py | 1 - setup.sh | 9 ++ test.py | 9 +- utils/utility.sh | 2 +- 12 files changed, 129 insertions(+), 136 deletions(-) diff --git a/README.md b/README.md index db940639a..758799716 100644 --- a/README.md +++ b/README.md @@ -24,8 +24,6 @@ ## Installation -### Basic setup - Please make sure the above [prerequisites](#prerequisites) have been satisfied before moving on. ```bash @@ -34,16 +32,6 @@ cd models/deep_speech_2 sh setup.sh ``` -### Decoders setup - -```bash -cd decoders/swig -sh setup.sh -cd ../.. -``` - -These commands will install the decoders that translate the ouptut probability vectors of DS2 model to text data, incuding CTC greedy decoder, CTC beam search decoder and its batch version. And a detailed usuage about them will be given in the following sections. - ## Getting Started Several shell scripts provided in `./examples` will help us to quickly give it a try, for most major modules, including data preparation, model training, case inference and model evaluation, with a few public dataset (e.g. [LibriSpeech](http://www.openslr.org/12/), [Aishell](http://www.openslr.org/33)). Reading these examples will also help you to understand how to make it work with your own data. @@ -189,7 +177,6 @@ Data augmentation has often been a highly effective technique to boost the deep Six optional augmentation components are provided to be selected, configured and inserted into the processing pipeline. ### Inference - - Volume Perturbation - Speed Perturbation - Shifting Perturbation diff --git a/data_utils/featurizer/text_featurizer.py b/data_utils/featurizer/text_featurizer.py index 89202163c..95dc637e0 100644 --- a/data_utils/featurizer/text_featurizer.py +++ b/data_utils/featurizer/text_featurizer.py @@ -22,6 +22,8 @@ class TextFeaturizer(object): def __init__(self, vocab_filepath): self._vocab_dict, self._vocab_list = self._load_vocabulary_from_file( vocab_filepath) + # from unicode to string + self._vocab_list = [chars.encode("utf-8") for chars in self._vocab_list] def featurize(self, text): """Convert text string to a list of token indices in char-level.Note diff --git a/decoders/swig/ctc_decoders.cpp b/decoders/swig/ctc_decoders.cpp index b52394b6e..e86bfe0f2 100644 --- a/decoders/swig/ctc_decoders.cpp +++ b/decoders/swig/ctc_decoders.cpp @@ -17,41 +17,38 @@ std::string ctc_greedy_decoder( const std::vector> &probs_seq, const std::vector &vocabulary) { // dimension check - int num_time_steps = probs_seq.size(); - for (int i = 0; i < num_time_steps; i++) { - if (probs_seq[i].size() != vocabulary.size() + 1) { - std::cout << "The shape of probs_seq does not match" - << " with the shape of the vocabulary!" << std::endl; - exit(1); - } + size_t num_time_steps = probs_seq.size(); + for (size_t i = 0; i < num_time_steps; i++) { + VALID_CHECK_EQ(probs_seq[i].size(), + vocabulary.size() + 1, + "The shape of probs_seq does not match with " + "the shape of the vocabulary"); } - int blank_id = vocabulary.size(); + size_t blank_id = vocabulary.size(); - std::vector max_idx_vec; - double max_prob = 0.0; - int max_idx = 0; - for (int i = 0; i < num_time_steps; i++) { - for (int j = 0; j < probs_seq[i].size(); j++) { + std::vector max_idx_vec; + for (size_t i = 0; i < num_time_steps; i++) { + double max_prob = 0.0; + size_t max_idx = 0; + for (size_t j = 0; j < probs_seq[i].size(); j++) { if (max_prob < probs_seq[i][j]) { max_idx = j; max_prob = probs_seq[i][j]; } } max_idx_vec.push_back(max_idx); - max_prob = 0.0; - max_idx = 0; } - std::vector idx_vec; - for (int i = 0; i < max_idx_vec.size(); i++) { + std::vector idx_vec; + for (size_t i = 0; i < max_idx_vec.size(); i++) { if ((i == 0) || ((i > 0) && max_idx_vec[i] != max_idx_vec[i - 1])) { idx_vec.push_back(max_idx_vec[i]); } } std::string best_path_result; - for (int i = 0; i < idx_vec.size(); i++) { + for (size_t i = 0; i < idx_vec.size(); i++) { if (idx_vec[i] != blank_id) { best_path_result += vocabulary[idx_vec[i]]; } @@ -61,29 +58,24 @@ std::string ctc_greedy_decoder( std::vector> ctc_beam_search_decoder( const std::vector> &probs_seq, - int beam_size, + const size_t beam_size, std::vector vocabulary, - int blank_id, - double cutoff_prob, - int cutoff_top_n, - Scorer *extscorer) { + const double cutoff_prob, + const size_t cutoff_top_n, + Scorer *ext_scorer) { // dimension check size_t num_time_steps = probs_seq.size(); - for (int i = 0; i < num_time_steps; i++) { - if (probs_seq[i].size() != vocabulary.size() + 1) { - std::cout << " The shape of probs_seq does not match" - << " with the shape of the vocabulary!" << std::endl; - exit(1); - } + for (size_t i = 0; i < num_time_steps; i++) { + VALID_CHECK_EQ(probs_seq[i].size(), + vocabulary.size() + 1, + "The shape of probs_seq does not match with " + "the shape of the vocabulary"); } - // blank_id check - if (blank_id > vocabulary.size()) { - std::cout << " Invalid blank_id! " << std::endl; - exit(1); - } + // assign blank id + size_t blank_id = vocabulary.size(); - // assign space ID + // assign space id std::vector::iterator it = std::find(vocabulary.begin(), vocabulary.end(), " "); int space_id = it - vocabulary.begin(); @@ -98,16 +90,16 @@ std::vector> ctc_beam_search_decoder( std::vector prefixes; prefixes.push_back(&root); - if (extscorer != nullptr) { - if (extscorer->is_char_map_empty()) { - extscorer->set_char_map(vocabulary); + if (ext_scorer != nullptr) { + if (ext_scorer->is_char_map_empty()) { + ext_scorer->set_char_map(vocabulary); } - if (!extscorer->is_character_based()) { - if (extscorer->dictionary == nullptr) { + if (!ext_scorer->is_character_based()) { + if (ext_scorer->dictionary == nullptr) { // fill dictionary for fst with space - extscorer->fill_dictionary(true); + ext_scorer->fill_dictionary(true); } - auto fst_dict = static_cast(extscorer->dictionary); + auto fst_dict = static_cast(ext_scorer->dictionary); fst::StdVectorFst *dict_ptr = fst_dict->Copy(true); root.set_dictionary(dict_ptr); auto matcher = std::make_shared(*dict_ptr, fst::MATCH_INPUT); @@ -116,33 +108,33 @@ std::vector> ctc_beam_search_decoder( } // prefix search over time - for (int time_step = 0; time_step < num_time_steps; time_step++) { + for (size_t time_step = 0; time_step < num_time_steps; time_step++) { std::vector prob = probs_seq[time_step]; std::vector> prob_idx; - for (int i = 0; i < prob.size(); i++) { + for (size_t i = 0; i < prob.size(); i++) { prob_idx.push_back(std::pair(i, prob[i])); } float min_cutoff = -NUM_FLT_INF; bool full_beam = false; - if (extscorer != nullptr) { - int num_prefixes = std::min((int)prefixes.size(), beam_size); + if (ext_scorer != nullptr) { + size_t num_prefixes = std::min(prefixes.size(), beam_size); std::sort( prefixes.begin(), prefixes.begin() + num_prefixes, prefix_compare); min_cutoff = prefixes[num_prefixes - 1]->score + log(prob[blank_id]) - - std::max(0.0, extscorer->beta); + std::max(0.0, ext_scorer->beta); full_beam = (num_prefixes == beam_size); } // pruning of vacobulary - int cutoff_len = prob.size(); + size_t cutoff_len = prob.size(); if (cutoff_prob < 1.0 || cutoff_top_n < prob.size()) { std::sort( prob_idx.begin(), prob_idx.end(), pair_comp_second_rev); if (cutoff_prob < 1.0) { double cum_prob = 0.0; cutoff_len = 0; - for (int i = 0; i < prob_idx.size(); i++) { + for (size_t i = 0; i < prob_idx.size(); i++) { cum_prob += prob_idx[i].second; cutoff_len += 1; if (cum_prob >= cutoff_prob) break; @@ -152,18 +144,18 @@ std::vector> ctc_beam_search_decoder( prob_idx = std::vector>( prob_idx.begin(), prob_idx.begin() + cutoff_len); } - std::vector> log_prob_idx; - for (int i = 0; i < cutoff_len; i++) { + std::vector> log_prob_idx; + for (size_t i = 0; i < cutoff_len; i++) { log_prob_idx.push_back(std::pair( prob_idx[i].first, log(prob_idx[i].second + NUM_FLT_MIN))); } // loop over chars - for (int index = 0; index < log_prob_idx.size(); index++) { + for (size_t index = 0; index < log_prob_idx.size(); index++) { auto c = log_prob_idx[index].first; float log_prob_c = log_prob_idx[index].second; - for (int i = 0; i < prefixes.size() && i < beam_size; i++) { + for (size_t i = 0; i < prefixes.size() && i < beam_size; i++) { auto prefix = prefixes[i]; if (full_beam && log_prob_c + prefix->score < min_cutoff) { @@ -194,12 +186,12 @@ std::vector> ctc_beam_search_decoder( } // language model scoring - if (extscorer != nullptr && - (c == space_id || extscorer->is_character_based())) { + if (ext_scorer != nullptr && + (c == space_id || ext_scorer->is_character_based())) { PathTrie *prefix_toscore = nullptr; // skip scoring the space - if (extscorer->is_character_based()) { + if (ext_scorer->is_character_based()) { prefix_toscore = prefix_new; } else { prefix_toscore = prefix; @@ -207,11 +199,11 @@ std::vector> ctc_beam_search_decoder( double score = 0.0; std::vector ngram; - ngram = extscorer->make_ngram(prefix_toscore); - score = extscorer->get_log_cond_prob(ngram) * extscorer->alpha; + ngram = ext_scorer->make_ngram(prefix_toscore); + score = ext_scorer->get_log_cond_prob(ngram) * ext_scorer->alpha; log_p += score; - log_p += extscorer->beta; + log_p += ext_scorer->beta; } prefix_new->log_prob_nb_cur = log_sum_exp(prefix_new->log_prob_nb_cur, log_p); @@ -240,15 +232,15 @@ std::vector> ctc_beam_search_decoder( for (size_t i = 0; i < beam_size && i < prefixes.size(); i++) { double approx_ctc = prefixes[i]->score; - if (extscorer != nullptr) { + if (ext_scorer != nullptr) { std::vector output; prefixes[i]->get_path_vec(output); size_t prefix_length = output.size(); - auto words = extscorer->split_labels(output); + auto words = ext_scorer->split_labels(output); // remove word insert - approx_ctc = approx_ctc - prefix_length * extscorer->beta; + approx_ctc = approx_ctc - prefix_length * ext_scorer->beta; // remove language model weight: - approx_ctc -= (extscorer->get_sent_log_prob(words)) * extscorer->alpha; + approx_ctc -= (ext_scorer->get_sent_log_prob(words)) * ext_scorer->alpha; } prefixes[i]->approx_ctc = approx_ctc; @@ -269,7 +261,7 @@ std::vector> ctc_beam_search_decoder( space_prefixes[i]->get_path_vec(output); // convert index to string std::string output_str; - for (int j = 0; j < output.size(); j++) { + for (size_t j = 0; j < output.size(); j++) { output_str += vocabulary[output[j]]; } std::pair output_pair(-space_prefixes[i]->approx_ctc, @@ -283,49 +275,45 @@ std::vector> ctc_beam_search_decoder( std::vector>> ctc_beam_search_decoder_batch( const std::vector>> &probs_split, - int beam_size, + const size_t beam_size, const std::vector &vocabulary, - int blank_id, - int num_processes, - double cutoff_prob, - int cutoff_top_n, - Scorer *extscorer) { - if (num_processes <= 0) { - std::cout << "num_processes must be nonnegative!" << std::endl; - exit(1); - } + const size_t num_processes, + const double cutoff_prob, + const size_t cutoff_top_n, + Scorer *ext_scorer) { + VALID_CHECK_GT(num_processes, 0, "num_processes must be nonnegative!"); // thread pool ThreadPool pool(num_processes); // number of samples - int batch_size = probs_split.size(); + size_t batch_size = probs_split.size(); // scorer filling up - if (extscorer != nullptr) { - if (extscorer->is_char_map_empty()) { - extscorer->set_char_map(vocabulary); + if (ext_scorer != nullptr) { + if (ext_scorer->is_char_map_empty()) { + ext_scorer->set_char_map(vocabulary); } - if (!extscorer->is_character_based() && extscorer->dictionary == nullptr) { + if (!ext_scorer->is_character_based() && + ext_scorer->dictionary == nullptr) { // init dictionary - extscorer->fill_dictionary(true); + ext_scorer->fill_dictionary(true); } } // enqueue the tasks of decoding std::vector>>> res; - for (int i = 0; i < batch_size; i++) { + for (size_t i = 0; i < batch_size; i++) { res.emplace_back(pool.enqueue(ctc_beam_search_decoder, probs_split[i], beam_size, vocabulary, - blank_id, cutoff_prob, cutoff_top_n, - extscorer)); + ext_scorer)); } // get decoding results std::vector>> batch_results; - for (int i = 0; i < batch_size; i++) { + for (size_t i = 0; i < batch_size; i++) { batch_results.emplace_back(res[i].get()); } return batch_results; diff --git a/decoders/swig/ctc_decoders.h b/decoders/swig/ctc_decoders.h index b8c512bda..6384c8a8f 100644 --- a/decoders/swig/ctc_decoders.h +++ b/decoders/swig/ctc_decoders.h @@ -27,21 +27,21 @@ std::string ctc_greedy_decoder( * over vocabulary of one time step. * beam_size: The width of beam search. * vocabulary: A vector of vocabulary. - * blank_id: ID of blank. * cutoff_prob: Cutoff probability for pruning. * cutoff_top_n: Cutoff number for pruning. - * ext_scorer: External scorer to evaluate a prefix. + * ext_scorer: External scorer to evaluate a prefix, which consists of + * n-gram language model scoring and word insertion term. + * Default null, decoding the input sample without scorer. * Return: * A vector that each element is a pair of score and decoding result, * in desending order. */ std::vector> ctc_beam_search_decoder( const std::vector> &probs_seq, - int beam_size, + const size_t beam_size, std::vector vocabulary, - int blank_id, - double cutoff_prob = 1.0, - int cutoff_top_n = 40, + const double cutoff_prob = 1.0, + const size_t cutoff_top_n = 40, Scorer *ext_scorer = NULL); /* CTC Beam Search Decoder for batch data @@ -52,11 +52,12 @@ std::vector> ctc_beam_search_decoder( * . * beam_size: The width of beam search. * vocabulary: A vector of vocabulary. - * blank_id: ID of blank. * num_processes: Number of threads for beam search. * cutoff_prob: Cutoff probability for pruning. * cutoff_top_n: Cutoff number for pruning. - * ext_scorer: External scorer to evaluate a prefix. + * ext_scorer: External scorer to evaluate a prefix, which consists of + * n-gram language model scoring and word insertion term. + * Default null, decoding the input sample without scorer. * Return: * A 2-D vector that each element is a vector of beam search decoding * result for one audio sample. @@ -64,12 +65,11 @@ std::vector> ctc_beam_search_decoder( std::vector>> ctc_beam_search_decoder_batch( const std::vector>> &probs_split, - int beam_size, + const size_t beam_size, const std::vector &vocabulary, - int blank_id, - int num_processes, + const size_t num_processes, double cutoff_prob = 1.0, - int cutoff_top_n = 40, + const size_t cutoff_top_n = 40, Scorer *ext_scorer = NULL); #endif // CTC_BEAM_SEARCH_DECODER_H_ diff --git a/decoders/swig/decoder_utils.h b/decoders/swig/decoder_utils.h index d4ee36e1b..015646ddd 100644 --- a/decoders/swig/decoder_utils.h +++ b/decoders/swig/decoder_utils.h @@ -7,6 +7,22 @@ const float NUM_FLT_INF = std::numeric_limits::max(); const float NUM_FLT_MIN = std::numeric_limits::min(); +// check if __A == _B +#define VALID_CHECK_EQ(__A, __B, __ERR) \ + if ((__A) != (__B)) { \ + std::ostringstream str; \ + str << (__A) << " != " << (__B) << ", "; \ + throw std::runtime_error(str.str() + __ERR); \ + } + +// check if __A > __B +#define VALID_CHECK_GT(__A, __B, __ERR) \ + if ((__A) <= (__B)) { \ + std::ostringstream str; \ + str << (__A) << " <= " << (__B) << ", "; \ + throw std::runtime_error(str.str() + __ERR); \ + } + // Function template for comparing two pairs template bool pair_comp_first_rev(const std::pair &a, diff --git a/decoders/swig_wrapper.py b/decoders/swig_wrapper.py index 202440bfb..54ed249f3 100644 --- a/decoders/swig_wrapper.py +++ b/decoders/swig_wrapper.py @@ -41,7 +41,6 @@ def ctc_greedy_decoder(probs_seq, vocabulary): def ctc_beam_search_decoder(probs_seq, beam_size, vocabulary, - blank_id, cutoff_prob=1.0, cutoff_top_n=40, ext_scoring_func=None): @@ -55,8 +54,6 @@ def ctc_beam_search_decoder(probs_seq, :type beam_size: int :param vocabulary: Vocabulary list. :type vocabulary: list - :param blank_id: ID of blank. - :type blank_id: int :param cutoff_prob: Cutoff probability in pruning, default 1.0, no pruning. :type cutoff_prob: float @@ -72,15 +69,14 @@ def ctc_beam_search_decoder(probs_seq, results, in descending order of the probability. :rtype: list """ - return swig_decoders.ctc_beam_search_decoder( - probs_seq.tolist(), beam_size, vocabulary, blank_id, cutoff_prob, - cutoff_top_n, ext_scoring_func) + return swig_decoders.ctc_beam_search_decoder(probs_seq.tolist(), beam_size, + vocabulary, cutoff_prob, + cutoff_top_n, ext_scoring_func) def ctc_beam_search_decoder_batch(probs_split, beam_size, vocabulary, - blank_id, num_processes, cutoff_prob=1.0, cutoff_top_n=40, @@ -94,8 +90,6 @@ def ctc_beam_search_decoder_batch(probs_split, :type beam_size: int :param vocabulary: Vocabulary list. :type vocabulary: list - :param blank_id: ID of blank. - :type blank_id: int :param num_processes: Number of parallel processes. :type num_processes: int :param cutoff_prob: Cutoff probability in vocabulary pruning, @@ -118,5 +112,5 @@ def ctc_beam_search_decoder_batch(probs_split, probs_split = [probs_seq.tolist() for probs_seq in probs_split] return swig_decoders.ctc_beam_search_decoder_batch( - probs_split, beam_size, vocabulary, blank_id, num_processes, - cutoff_prob, cutoff_top_n, ext_scoring_func) + probs_split, beam_size, vocabulary, num_processes, cutoff_prob, + cutoff_top_n, ext_scoring_func) diff --git a/examples/librispeech/run_test_golden.sh b/examples/librispeech/run_test_golden.sh index 080c3c062..e539bd013 100644 --- a/examples/librispeech/run_test_golden.sh +++ b/examples/librispeech/run_test_golden.sh @@ -31,13 +31,13 @@ python -u test.py \ --num_conv_layers=2 \ --num_rnn_layers=3 \ --rnn_layer_size=2048 \ ---alpha=0.36 \ ---beta=0.25 \ ---cutoff_prob=0.99 \ +--alpha=2.15 \ +--beta=0.35 \ +--cutoff_prob=1.0 \ --use_gru=False \ --use_gpu=True \ --share_rnn_weights=True \ ---test_manifest='data/tiny/manifest.test-clean' \ +--test_manifest='data/librispeech/manifest.test-clean' \ --mean_std_path='models/librispeech/mean_std.npz' \ --vocab_path='models/librispeech/vocab.txt' \ --model_path='models/librispeech/params.tar.gz' \ diff --git a/infer.py b/infer.py index 48c4ef493..5da1db970 100644 --- a/infer.py +++ b/infer.py @@ -21,9 +21,9 @@ add_arg('num_proc_bsearch', int, 12, "# of CPUs for beam search.") add_arg('num_conv_layers', int, 2, "# of convolution layers.") add_arg('num_rnn_layers', int, 3, "# of recurrent layers.") add_arg('rnn_layer_size', int, 2048, "# of recurrent cells per layer.") -add_arg('alpha', float, 0.36, "Coef of LM for beam search.") -add_arg('beta', float, 0.25, "Coef of WC for beam search.") -add_arg('cutoff_prob', float, 0.99, "Cutoff probability for pruning.") +add_arg('alpha', float, 2.15, "Coef of LM for beam search.") +add_arg('beta', float, 0.35, "Coef of WC for beam search.") +add_arg('cutoff_prob', float, 1.0, "Cutoff probability for pruning.") add_arg('use_gru', bool, False, "Use GRUs instead of simple RNNs.") add_arg('use_gpu', bool, True, "Use GPU or not.") add_arg('share_rnn_weights',bool, True, "Share input-hidden weights across " @@ -85,7 +85,6 @@ def infer(): pretrained_model_path=args.model_path, share_rnn_weights=args.share_rnn_weights) - vocab_list = [chars.encode("utf-8") for chars in data_generator.vocab_list] result_transcripts = ds2_model.infer_batch( infer_data=infer_data, decoding_method=args.decoding_method, @@ -93,7 +92,7 @@ def infer(): beam_beta=args.beta, beam_size=args.beam_size, cutoff_prob=args.cutoff_prob, - vocab_list=vocab_list, + vocab_list=data_generator.vocab_list, language_model_path=args.lang_model_path, num_processes=args.num_proc_bsearch) diff --git a/model_utils/model.py b/model_utils/model.py index 5812afca6..1a9910e9d 100644 --- a/model_utils/model.py +++ b/model_utils/model.py @@ -214,7 +214,6 @@ class DeepSpeech2Model(object): probs_split=probs_split, vocabulary=vocab_list, beam_size=beam_size, - blank_id=len(vocab_list), num_processes=num_processes, ext_scoring_func=self._ext_scorer, cutoff_prob=cutoff_prob) diff --git a/setup.sh b/setup.sh index 6c8a70994..dcb3e0fbc 100644 --- a/setup.sh +++ b/setup.sh @@ -26,4 +26,13 @@ if [ $? != 0 ]; then rm libsndfile-1.0.28.tar.gz fi +# install decoders +python -c "import swig_decoders" +if [ $? != 0 ]; then + pushd decoders/swig > /dev/null + sh setup.sh + popd > /dev/null +fi + + echo "Install all dependencies successfully." diff --git a/test.py b/test.py index 499f71f62..76efb4d1e 100644 --- a/test.py +++ b/test.py @@ -22,9 +22,9 @@ add_arg('num_proc_data', int, 12, "# of CPUs for data preprocessing.") add_arg('num_conv_layers', int, 2, "# of convolution layers.") add_arg('num_rnn_layers', int, 3, "# of recurrent layers.") add_arg('rnn_layer_size', int, 2048, "# of recurrent cells per layer.") -add_arg('alpha', float, 0.36, "Coef of LM for beam search.") -add_arg('beta', float, 0.25, "Coef of WC for beam search.") -add_arg('cutoff_prob', float, 0.99, "Cutoff probability for pruning.") +add_arg('alpha', float, 2.15, "Coef of LM for beam search.") +add_arg('beta', float, 0.35, "Coef of WC for beam search.") +add_arg('cutoff_prob', float, 1.0, "Cutoff probability for pruning.") add_arg('use_gru', bool, False, "Use GRUs instead of simple RNNs.") add_arg('use_gpu', bool, True, "Use GPU or not.") add_arg('share_rnn_weights',bool, True, "Share input-hidden weights across " @@ -85,7 +85,6 @@ def evaluate(): pretrained_model_path=args.model_path, share_rnn_weights=args.share_rnn_weights) - vocab_list = [chars.encode("utf-8") for chars in data_generator.vocab_list] error_rate_func = cer if args.error_rate_type == 'cer' else wer error_sum, num_ins = 0.0, 0 for infer_data in batch_reader(): @@ -96,7 +95,7 @@ def evaluate(): beam_beta=args.beta, beam_size=args.beam_size, cutoff_prob=args.cutoff_prob, - vocab_list=vocab_list, + vocab_list=data_generator.vocab_list, language_model_path=args.lang_model_path, num_processes=args.num_proc_bsearch) target_transcripts = [ diff --git a/utils/utility.sh b/utils/utility.sh index c8121126a..aa0ec002b 100644 --- a/utils/utility.sh +++ b/utils/utility.sh @@ -13,7 +13,7 @@ download() { wget -c $URL -P `dirname "$TARGET"` md5_result=`md5sum $TARGET | awk -F[' '] '{print $1}'` - if [ $MD5 -ne $md5_result ]; then + if [ ! $MD5 == $md5_result ]; then echo "Fail to download the language model!" return 1 fi From 3bb746c61f3440fc9b5a0bd0930370d1a1adbf8c Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Fri, 15 Sep 2017 23:06:43 +0800 Subject: [PATCH 194/335] Add last two sections (experiments and model released) to README.md. --- README.md | 49 ++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 46 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 9d9d4c77e..4080476b4 100644 --- a/README.md +++ b/README.md @@ -419,13 +419,56 @@ python deploy/demo_server.py --help python deploy/demo_client.py --help ``` +## Released Models + +#### Speech Model Released + +Language | Model Name | Training Data | Training Hours +:-----------: | :------------: | :----------: | -------: +English | [LibriSpeech Model](http://cloud.dlnel.org/filepub/?uuid=17404caf-cf19-492f-9707-1fad07c19aae) | [LibriSpeech Dataset](http://www.openslr.org/12/) | 960 h +English | [Internal English Model](to-be-added) | Baidu English Dataset | 8000 h +Mandarin | [Aishell Model](http://cloud.dlnel.org/filepub/?uuid=6c83b9d8-3255-4adf-9726-0fe0be3d0274) | [Aishell Dataset](http://www.openslr.org/33/) | 151 h +Mandarin | [Internal Mandarin Model](to-be-added) | Baidu Mandarin Dataset | 2917 h + +#### Language Model Released + +Language Model | Training Data | Token-based | Size | Filter Configuraiton +:-------------:| :------------:| :-----: | -----: | -----------------: +[English LM (Median)](http://paddlepaddle.bj.bcebos.com/model_zoo/speech/common_crawl_00.prune01111.trie.klm) | To Be Added | Word-based | 8.3 GB | To Be Added +[English LM (Big)](to-be-added) | To Be Added | Word-based | X.X GB | To Be Added +[Mandarin LM (Median)](http://cloud.dlnel.org/filepub/?uuid=d21861e4-4ed6-45bb-ad8e-ae417a43195e) | To Be Added | Character-based | 2.8 GB | To Be Added +[Mandarin LM (Big)](to-be-added) | To Be Added | Character-based | X.X GB | To Be Added + ## Experiments and Benchmarks -TODO: to be added +#### English Model Evaluation (Word Error Rate) -## Released Models +Test Set | LibriSpeech Model | Internal English Model +:---------------------: | :---------------: | :-------------------: +LibriSpeech-Test-Clean | 7.9 | X.X +LibriSpeech-Test-Other | X.X | X.X +VoxForge-Test | X.X | X.X +Baidu-English-Test | X.X | X.X -TODO: to be added +#### English Model Evaluation (Character Error Rate) + +Test Set | LibriSpeech Model | Internal English Model +:---------------------: | :---------------: | :-------------------: +LibriSpeech-Test-Clean | X.X | X.X +LibriSpeech-Test-Other | X.X | X.X +VoxForge-Test | X.X | X.X +Baidu-English-Test | X.X | X.X + +#### Mandarin Model Evaluation (Character Error Rate) + +Test Set | Aishell Model | Internal Mandarin Model +:---------------------: | :---------------: | :-------------------: +Aishell-Test | X.X | X.X +Baidu-Mandarin-Test | X.X | X.X + +#### Multiple GPU Efficiency + +TODO: To Be Added ## Questions and Help From c3710b7f5242ef4d231413c2a4e50cf9011d2a05 Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Fri, 15 Sep 2017 23:13:32 +0800 Subject: [PATCH 195/335] Add wget return check. --- utils/utility.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/utils/utility.sh b/utils/utility.sh index f242b7640..baae04743 100644 --- a/utils/utility.sh +++ b/utils/utility.sh @@ -12,6 +12,10 @@ download() { fi wget -c $URL -O "$TARGET" + if [ $? -ne 0 ]; then + return 1 + fi + md5_result=`md5sum $TARGET | awk -F[' '] '{print $1}'` if [ ! $MD5 == $md5_result ]; then return 1 From 7e093ed1a3f46b2c98b41ee7edeea601bc208a13 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Sat, 16 Sep 2017 12:38:58 +0800 Subject: [PATCH 196/335] expose param cutoff_top_n --- data_utils/featurizer/text_featurizer.py | 2 -- decoders/decoder_deprecated.py | 20 ++++++++------------ decoders/lm_scorer_deprecated.py | 2 +- decoders/swig/ctc_decoders.cpp | 2 +- examples/librispeech/run_infer.sh | 1 + examples/librispeech/run_infer_golden.sh | 1 + examples/librispeech/run_test_golden.sh | 1 + infer.py | 9 +++++++-- model_utils/model.py | 11 ++++++++--- test.py | 9 +++++++-- 10 files changed, 35 insertions(+), 23 deletions(-) diff --git a/data_utils/featurizer/text_featurizer.py b/data_utils/featurizer/text_featurizer.py index 95dc637e0..89202163c 100644 --- a/data_utils/featurizer/text_featurizer.py +++ b/data_utils/featurizer/text_featurizer.py @@ -22,8 +22,6 @@ class TextFeaturizer(object): def __init__(self, vocab_filepath): self._vocab_dict, self._vocab_list = self._load_vocabulary_from_file( vocab_filepath) - # from unicode to string - self._vocab_list = [chars.encode("utf-8") for chars in self._vocab_list] def featurize(self, text): """Convert text string to a list of token indices in char-level.Note diff --git a/decoders/decoder_deprecated.py b/decoders/decoder_deprecated.py index ffba2731a..647431632 100644 --- a/decoders/decoder_deprecated.py +++ b/decoders/decoder_deprecated.py @@ -42,8 +42,8 @@ def ctc_greedy_decoder(probs_seq, vocabulary): def ctc_beam_search_decoder(probs_seq, beam_size, vocabulary, - blank_id, cutoff_prob=1.0, + cutoff_top_n=40, ext_scoring_func=None, nproc=False): """CTC Beam search decoder. @@ -66,8 +66,6 @@ def ctc_beam_search_decoder(probs_seq, :type beam_size: int :param vocabulary: Vocabulary list. :type vocabulary: list - :param blank_id: ID of blank. - :type blank_id: int :param cutoff_prob: Cutoff probability in pruning, default 1.0, no pruning. :type cutoff_prob: float @@ -87,9 +85,8 @@ def ctc_beam_search_decoder(probs_seq, raise ValueError("The shape of prob_seq does not match with the " "shape of the vocabulary.") - # blank_id check - if not blank_id < len(probs_seq[0]): - raise ValueError("blank_id shouldn't be greater than probs dimension") + # blank_id assign + blank_id = len(vocabulary) # If the decoder called in the multiprocesses, then use the global scorer # instantiated in ctc_beam_search_decoder_batch(). @@ -114,7 +111,7 @@ def ctc_beam_search_decoder(probs_seq, prob_idx = list(enumerate(probs_seq[time_step])) cutoff_len = len(prob_idx) #If pruning is enabled - if cutoff_prob < 1.0: + if cutoff_prob < 1.0 or cutoff_top_n < cutoff_len: prob_idx = sorted(prob_idx, key=lambda asd: asd[1], reverse=True) cutoff_len, cum_prob = 0, 0.0 for i in xrange(len(prob_idx)): @@ -122,6 +119,7 @@ def ctc_beam_search_decoder(probs_seq, cutoff_len += 1 if cum_prob >= cutoff_prob: break + cutoff_len = min(cutoff_top_n, cutoff_top_n) prob_idx = prob_idx[0:cutoff_len] for l in prefix_set_prev: @@ -191,9 +189,9 @@ def ctc_beam_search_decoder(probs_seq, def ctc_beam_search_decoder_batch(probs_split, beam_size, vocabulary, - blank_id, num_processes, cutoff_prob=1.0, + cutoff_top_n=40, ext_scoring_func=None): """CTC beam search decoder using multiple processes. @@ -204,8 +202,6 @@ def ctc_beam_search_decoder_batch(probs_split, :type beam_size: int :param vocabulary: Vocabulary list. :type vocabulary: list - :param blank_id: ID of blank. - :type blank_id: int :param num_processes: Number of parallel processes. :type num_processes: int :param cutoff_prob: Cutoff probability in pruning, @@ -232,8 +228,8 @@ def ctc_beam_search_decoder_batch(probs_split, pool = multiprocessing.Pool(processes=num_processes) results = [] for i, probs_list in enumerate(probs_split): - args = (probs_list, beam_size, vocabulary, blank_id, cutoff_prob, None, - nproc) + args = (probs_list, beam_size, vocabulary, blank_id, cutoff_prob, + cutoff_top_n, None, nproc) results.append(pool.apply_async(ctc_beam_search_decoder, args)) pool.close() diff --git a/decoders/lm_scorer_deprecated.py b/decoders/lm_scorer_deprecated.py index 463e96d66..c6a661030 100644 --- a/decoders/lm_scorer_deprecated.py +++ b/decoders/lm_scorer_deprecated.py @@ -8,7 +8,7 @@ import kenlm import numpy as np -class LmScorer(object): +class Scorer(object): """External scorer to evaluate a prefix or whole sentence in beam search decoding, including the score from n-gram language model and word count. diff --git a/decoders/swig/ctc_decoders.cpp b/decoders/swig/ctc_decoders.cpp index 86598eee6..35425fbca 100644 --- a/decoders/swig/ctc_decoders.cpp +++ b/decoders/swig/ctc_decoders.cpp @@ -128,7 +128,7 @@ std::vector> ctc_beam_search_decoder( // pruning of vacobulary size_t cutoff_len = prob.size(); - if (cutoff_prob < 1.0 || cutoff_top_n < prob.size()) { + if (cutoff_prob < 1.0 || cutoff_top_n < cutoff_len) { std::sort( prob_idx.begin(), prob_idx.end(), pair_comp_second_rev); if (cutoff_prob < 1.0) { diff --git a/examples/librispeech/run_infer.sh b/examples/librispeech/run_infer.sh index fa177933a..b6f254a0b 100644 --- a/examples/librispeech/run_infer.sh +++ b/examples/librispeech/run_infer.sh @@ -24,6 +24,7 @@ python -u infer.py \ --alpha=2.15 \ --beta=0.35 \ --cutoff_prob=1.0 \ +--cutoff_top_n=40 \ --use_gru=False \ --use_gpu=True \ --share_rnn_weights=True \ diff --git a/examples/librispeech/run_infer_golden.sh b/examples/librispeech/run_infer_golden.sh index 20dfc65ee..9336edebb 100644 --- a/examples/librispeech/run_infer_golden.sh +++ b/examples/librispeech/run_infer_golden.sh @@ -33,6 +33,7 @@ python -u infer.py \ --alpha=2.15 \ --beta=0.35 \ --cutoff_prob=1.0 \ +--cutoff_top_n=40 \ --use_gru=False \ --use_gpu=True \ --share_rnn_weights=True \ diff --git a/examples/librispeech/run_test_golden.sh b/examples/librispeech/run_test_golden.sh index e539bd013..6aed4cfca 100644 --- a/examples/librispeech/run_test_golden.sh +++ b/examples/librispeech/run_test_golden.sh @@ -34,6 +34,7 @@ python -u test.py \ --alpha=2.15 \ --beta=0.35 \ --cutoff_prob=1.0 \ +--cutoff_top_n=40 \ --use_gru=False \ --use_gpu=True \ --share_rnn_weights=True \ diff --git a/infer.py b/infer.py index 5da1db970..1064fd25a 100644 --- a/infer.py +++ b/infer.py @@ -23,7 +23,8 @@ add_arg('num_rnn_layers', int, 3, "# of recurrent layers.") add_arg('rnn_layer_size', int, 2048, "# of recurrent cells per layer.") add_arg('alpha', float, 2.15, "Coef of LM for beam search.") add_arg('beta', float, 0.35, "Coef of WC for beam search.") -add_arg('cutoff_prob', float, 1.0, "Cutoff probability for pruning.") +add_arg('cutoff_prob', float, 1.0, "Cutoff probability for pruning.") +add_arg('cutoff_top_n', int, 40, "Cutoff number for pruning.") add_arg('use_gru', bool, False, "Use GRUs instead of simple RNNs.") add_arg('use_gpu', bool, True, "Use GPU or not.") add_arg('share_rnn_weights',bool, True, "Share input-hidden weights across " @@ -85,6 +86,9 @@ def infer(): pretrained_model_path=args.model_path, share_rnn_weights=args.share_rnn_weights) + # decoders only accept string encoded in utf-8 + vocab_list = [chars.encode("utf-8") for chars in data_generator.vocab_list] + result_transcripts = ds2_model.infer_batch( infer_data=infer_data, decoding_method=args.decoding_method, @@ -92,7 +96,8 @@ def infer(): beam_beta=args.beta, beam_size=args.beam_size, cutoff_prob=args.cutoff_prob, - vocab_list=data_generator.vocab_list, + cutoff_top_n=args.cutoff_top_n, + vocab_list=vocab_list, language_model_path=args.lang_model_path, num_processes=args.num_proc_bsearch) diff --git a/model_utils/model.py b/model_utils/model.py index 1a9910e9d..4f5021a6d 100644 --- a/model_utils/model.py +++ b/model_utils/model.py @@ -148,8 +148,8 @@ class DeepSpeech2Model(object): return self._loss_inferer.infer(input=infer_data) def infer_batch(self, infer_data, decoding_method, beam_alpha, beam_beta, - beam_size, cutoff_prob, vocab_list, language_model_path, - num_processes): + beam_size, cutoff_prob, cutoff_top_n, vocab_list, + language_model_path, num_processes): """Model inference. Infer the transcription for a batch of speech utterances. @@ -169,6 +169,10 @@ class DeepSpeech2Model(object): :param cutoff_prob: Cutoff probability in pruning, default 1.0, no pruning. :type cutoff_prob: float + :param cutoff_top_n: Cutoff number in pruning, only top cutoff_top_n + characters with highest probs in vocabulary will be + used in beam search, default 40. + :type cutoff_top_n: int :param vocab_list: List of tokens in the vocabulary, for decoding. :type vocab_list: list :param language_model_path: Filepath for language model. @@ -216,7 +220,8 @@ class DeepSpeech2Model(object): beam_size=beam_size, num_processes=num_processes, ext_scoring_func=self._ext_scorer, - cutoff_prob=cutoff_prob) + cutoff_prob=cutoff_prob, + cutoff_top_n=cutoff_top_n) results = [result[0][1] for result in beam_search_results] else: diff --git a/test.py b/test.py index 76efb4d1e..c564bb85d 100644 --- a/test.py +++ b/test.py @@ -24,7 +24,8 @@ add_arg('num_rnn_layers', int, 3, "# of recurrent layers.") add_arg('rnn_layer_size', int, 2048, "# of recurrent cells per layer.") add_arg('alpha', float, 2.15, "Coef of LM for beam search.") add_arg('beta', float, 0.35, "Coef of WC for beam search.") -add_arg('cutoff_prob', float, 1.0, "Cutoff probability for pruning.") +add_arg('cutoff_prob', float, 1.0, "Cutoff probability for pruning.") +add_arg('cutoff_top_n', int, 40, "Cutoff number for pruning.") add_arg('use_gru', bool, False, "Use GRUs instead of simple RNNs.") add_arg('use_gpu', bool, True, "Use GPU or not.") add_arg('share_rnn_weights',bool, True, "Share input-hidden weights across " @@ -85,6 +86,9 @@ def evaluate(): pretrained_model_path=args.model_path, share_rnn_weights=args.share_rnn_weights) + # decoders only accept string encoded in utf-8 + vocab_list = [chars.encode("utf-8") for chars in data_generator.vocab_list] + error_rate_func = cer if args.error_rate_type == 'cer' else wer error_sum, num_ins = 0.0, 0 for infer_data in batch_reader(): @@ -95,7 +99,8 @@ def evaluate(): beam_beta=args.beta, beam_size=args.beam_size, cutoff_prob=args.cutoff_prob, - vocab_list=data_generator.vocab_list, + cutoff_top_n=args.cutoff_top_n, + vocab_list=vocab_list, language_model_path=args.lang_model_path, num_processes=args.num_proc_bsearch) target_transcripts = [ From a24d0138d9c300024d040c735df1421d32e36ebb Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Sun, 17 Sep 2017 19:05:04 +0800 Subject: [PATCH 197/335] adjust scorer's init & add logging for scorer & separate long functions --- README.md | 1 - ...r_deprecated.py => decoders_deprecated.py} | 6 +- ...rer_deprecated.py => scorer_deprecated.py} | 0 ...coders.cpp => ctc_beam_search_decoder.cpp} | 164 +++--------------- ...c_decoders.h => ctc_beam_search_decoder.h} | 29 +--- decoders/swig/ctc_greedy_decoder.cpp | 45 +++++ decoders/swig/ctc_greedy_decoder.h | 20 +++ decoders/swig/decoder_utils.cpp | 65 +++++++ decoders/swig/decoder_utils.h | 39 +++-- decoders/swig/decoders.i | 6 +- decoders/swig/path_trie.h | 9 +- decoders/swig/scorer.cpp | 42 +++-- decoders/swig/scorer.h | 35 ++-- decoders/swig/setup.py | 13 +- decoders/swig/setup.sh | 2 +- decoders/swig_wrapper.py | 22 +-- examples/tiny/run_infer.sh | 6 +- examples/tiny/run_infer_golden.sh | 6 +- examples/tiny/run_test.sh | 6 +- examples/tiny/run_test_golden.sh | 6 +- infer.py | 1 + model_utils/model.py | 25 ++- test.py | 1 + 23 files changed, 310 insertions(+), 239 deletions(-) rename decoders/{decoder_deprecated.py => decoders_deprecated.py} (98%) rename decoders/{lm_scorer_deprecated.py => scorer_deprecated.py} (100%) rename decoders/swig/{ctc_decoders.cpp => ctc_beam_search_decoder.cpp} (55%) rename decoders/swig/{ctc_decoders.h => ctc_beam_search_decoder.h} (75%) create mode 100644 decoders/swig/ctc_greedy_decoder.cpp create mode 100644 decoders/swig/ctc_greedy_decoder.h diff --git a/README.md b/README.md index 758799716..9d9d4c77e 100644 --- a/README.md +++ b/README.md @@ -176,7 +176,6 @@ Data augmentation has often been a highly effective technique to boost the deep Six optional augmentation components are provided to be selected, configured and inserted into the processing pipeline. -### Inference - Volume Perturbation - Speed Perturbation - Shifting Perturbation diff --git a/decoders/decoder_deprecated.py b/decoders/decoders_deprecated.py similarity index 98% rename from decoders/decoder_deprecated.py rename to decoders/decoders_deprecated.py index 647431632..17b28b0d0 100644 --- a/decoders/decoder_deprecated.py +++ b/decoders/decoders_deprecated.py @@ -119,7 +119,7 @@ def ctc_beam_search_decoder(probs_seq, cutoff_len += 1 if cum_prob >= cutoff_prob: break - cutoff_len = min(cutoff_top_n, cutoff_top_n) + cutoff_len = min(cutoff_len, cutoff_top_n) prob_idx = prob_idx[0:cutoff_len] for l in prefix_set_prev: @@ -228,8 +228,8 @@ def ctc_beam_search_decoder_batch(probs_split, pool = multiprocessing.Pool(processes=num_processes) results = [] for i, probs_list in enumerate(probs_split): - args = (probs_list, beam_size, vocabulary, blank_id, cutoff_prob, - cutoff_top_n, None, nproc) + args = (probs_list, beam_size, vocabulary, cutoff_prob, cutoff_top_n, + None, nproc) results.append(pool.apply_async(ctc_beam_search_decoder, args)) pool.close() diff --git a/decoders/lm_scorer_deprecated.py b/decoders/scorer_deprecated.py similarity index 100% rename from decoders/lm_scorer_deprecated.py rename to decoders/scorer_deprecated.py diff --git a/decoders/swig/ctc_decoders.cpp b/decoders/swig/ctc_beam_search_decoder.cpp similarity index 55% rename from decoders/swig/ctc_decoders.cpp rename to decoders/swig/ctc_beam_search_decoder.cpp index 35425fbca..36d169871 100644 --- a/decoders/swig/ctc_decoders.cpp +++ b/decoders/swig/ctc_beam_search_decoder.cpp @@ -1,4 +1,4 @@ -#include "ctc_decoders.h" +#include "ctc_beam_search_decoder.h" #include #include @@ -9,59 +9,19 @@ #include "ThreadPool.h" #include "fst/fstlib.h" +#include "fst/log.h" #include "decoder_utils.h" #include "path_trie.h" -std::string ctc_greedy_decoder( - const std::vector> &probs_seq, - const std::vector &vocabulary) { - // dimension check - size_t num_time_steps = probs_seq.size(); - for (size_t i = 0; i < num_time_steps; ++i) { - VALID_CHECK_EQ(probs_seq[i].size(), - vocabulary.size() + 1, - "The shape of probs_seq does not match with " - "the shape of the vocabulary"); - } - - size_t blank_id = vocabulary.size(); - - std::vector max_idx_vec; - for (size_t i = 0; i < num_time_steps; ++i) { - double max_prob = 0.0; - size_t max_idx = 0; - for (size_t j = 0; j < probs_seq[i].size(); j++) { - if (max_prob < probs_seq[i][j]) { - max_idx = j; - max_prob = probs_seq[i][j]; - } - } - max_idx_vec.push_back(max_idx); - } - - std::vector idx_vec; - for (size_t i = 0; i < max_idx_vec.size(); ++i) { - if ((i == 0) || ((i > 0) && max_idx_vec[i] != max_idx_vec[i - 1])) { - idx_vec.push_back(max_idx_vec[i]); - } - } - - std::string best_path_result; - for (size_t i = 0; i < idx_vec.size(); ++i) { - if (idx_vec[i] != blank_id) { - best_path_result += vocabulary[idx_vec[i]]; - } - } - return best_path_result; -} +using FSTMATCH = fst::SortedMatcher; std::vector> ctc_beam_search_decoder( const std::vector> &probs_seq, - const size_t beam_size, + size_t beam_size, std::vector vocabulary, - const double cutoff_prob, - const size_t cutoff_top_n, + double cutoff_prob, + size_t cutoff_top_n, Scorer *ext_scorer) { // dimension check size_t num_time_steps = probs_seq.size(); @@ -80,7 +40,7 @@ std::vector> ctc_beam_search_decoder( std::find(vocabulary.begin(), vocabulary.end(), " "); int space_id = it - vocabulary.begin(); // if no space in vocabulary - if (space_id >= vocabulary.size()) { + if ((size_t)space_id >= vocabulary.size()) { space_id = -2; } @@ -90,30 +50,17 @@ std::vector> ctc_beam_search_decoder( std::vector prefixes; prefixes.push_back(&root); - if (ext_scorer != nullptr) { - if (ext_scorer->is_char_map_empty()) { - ext_scorer->set_char_map(vocabulary); - } - if (!ext_scorer->is_character_based()) { - if (ext_scorer->dictionary == nullptr) { - // fill dictionary for fst with space - ext_scorer->fill_dictionary(true); - } - auto fst_dict = static_cast(ext_scorer->dictionary); - fst::StdVectorFst *dict_ptr = fst_dict->Copy(true); - root.set_dictionary(dict_ptr); - auto matcher = std::make_shared(*dict_ptr, fst::MATCH_INPUT); - root.set_matcher(matcher); - } + if (ext_scorer != nullptr && !ext_scorer->is_character_based()) { + auto fst_dict = static_cast(ext_scorer->dictionary); + fst::StdVectorFst *dict_ptr = fst_dict->Copy(true); + root.set_dictionary(dict_ptr); + auto matcher = std::make_shared(*dict_ptr, fst::MATCH_INPUT); + root.set_matcher(matcher); } // prefix search over time - for (size_t time_step = 0; time_step < num_time_steps; time_step++) { - std::vector prob = probs_seq[time_step]; - std::vector> prob_idx; - for (size_t i = 0; i < prob.size(); ++i) { - prob_idx.push_back(std::pair(i, prob[i])); - } + for (size_t time_step = 0; time_step < num_time_steps; ++time_step) { + auto &prob = probs_seq[time_step]; float min_cutoff = -NUM_FLT_INF; bool full_beam = false; @@ -121,43 +68,20 @@ std::vector> ctc_beam_search_decoder( size_t num_prefixes = std::min(prefixes.size(), beam_size); std::sort( prefixes.begin(), prefixes.begin() + num_prefixes, prefix_compare); - min_cutoff = prefixes[num_prefixes - 1]->score + log(prob[blank_id]) - - std::max(0.0, ext_scorer->beta); + min_cutoff = prefixes[num_prefixes - 1]->score + + std::log(prob[blank_id]) - std::max(0.0, ext_scorer->beta); full_beam = (num_prefixes == beam_size); } - // pruning of vacobulary - size_t cutoff_len = prob.size(); - if (cutoff_prob < 1.0 || cutoff_top_n < cutoff_len) { - std::sort( - prob_idx.begin(), prob_idx.end(), pair_comp_second_rev); - if (cutoff_prob < 1.0) { - double cum_prob = 0.0; - cutoff_len = 0; - for (size_t i = 0; i < prob_idx.size(); ++i) { - cum_prob += prob_idx[i].second; - cutoff_len += 1; - if (cum_prob >= cutoff_prob) break; - } - } - cutoff_len = std::min(cutoff_len, cutoff_top_n); - prob_idx = std::vector>( - prob_idx.begin(), prob_idx.begin() + cutoff_len); - } - std::vector> log_prob_idx; - for (size_t i = 0; i < cutoff_len; ++i) { - log_prob_idx.push_back(std::pair( - prob_idx[i].first, log(prob_idx[i].second + NUM_FLT_MIN))); - } - + std::vector> log_prob_idx = + get_pruned_log_probs(prob, cutoff_prob, cutoff_top_n); // loop over chars for (size_t index = 0; index < log_prob_idx.size(); index++) { auto c = log_prob_idx[index].first; - float log_prob_c = log_prob_idx[index].second; + auto log_prob_c = log_prob_idx[index].second; for (size_t i = 0; i < prefixes.size() && i < beam_size; ++i) { auto prefix = prefixes[i]; - if (full_beam && log_prob_c + prefix->score < min_cutoff) { break; } @@ -189,7 +113,6 @@ std::vector> ctc_beam_search_decoder( if (ext_scorer != nullptr && (c == space_id || ext_scorer->is_character_based())) { PathTrie *prefix_toscore = nullptr; - // skip scoring the space if (ext_scorer->is_character_based()) { prefix_toscore = prefix_new; @@ -201,7 +124,6 @@ std::vector> ctc_beam_search_decoder( std::vector ngram; ngram = ext_scorer->make_ngram(prefix_toscore); score = ext_scorer->get_log_cond_prob(ngram) * ext_scorer->alpha; - log_p += score; log_p += ext_scorer->beta; } @@ -221,57 +143,33 @@ std::vector> ctc_beam_search_decoder( prefixes.begin() + beam_size, prefixes.end(), prefix_compare); - for (size_t i = beam_size; i < prefixes.size(); ++i) { prefixes[i]->remove(); } } } // end of loop over time - // compute aproximate ctc score as the return score + // compute aproximate ctc score as the return score, without affecting the + // return order of decoding result. To delete when decoder gets stable. for (size_t i = 0; i < beam_size && i < prefixes.size(); ++i) { double approx_ctc = prefixes[i]->score; - if (ext_scorer != nullptr) { std::vector output; prefixes[i]->get_path_vec(output); - size_t prefix_length = output.size(); + auto prefix_length = output.size(); auto words = ext_scorer->split_labels(output); // remove word insert approx_ctc = approx_ctc - prefix_length * ext_scorer->beta; // remove language model weight: approx_ctc -= (ext_scorer->get_sent_log_prob(words)) * ext_scorer->alpha; } - prefixes[i]->approx_ctc = approx_ctc; } - // allow for the post processing - std::vector space_prefixes; - if (space_prefixes.empty()) { - for (size_t i = 0; i < beam_size && i < prefixes.size(); ++i) { - space_prefixes.push_back(prefixes[i]); - } - } - - std::sort(space_prefixes.begin(), space_prefixes.end(), prefix_compare); - std::vector> output_vecs; - for (size_t i = 0; i < beam_size && i < space_prefixes.size(); ++i) { - std::vector output; - space_prefixes[i]->get_path_vec(output); - // convert index to string - std::string output_str; - for (size_t j = 0; j < output.size(); j++) { - output_str += vocabulary[output[j]]; - } - std::pair output_pair(-space_prefixes[i]->approx_ctc, - output_str); - output_vecs.emplace_back(output_pair); - } - - return output_vecs; + return get_beam_search_result(prefixes, vocabulary, beam_size); } + std::vector>> ctc_beam_search_decoder_batch( const std::vector>> &probs_split, @@ -287,18 +185,6 @@ ctc_beam_search_decoder_batch( // number of samples size_t batch_size = probs_split.size(); - // scorer filling up - if (ext_scorer != nullptr) { - if (ext_scorer->is_char_map_empty()) { - ext_scorer->set_char_map(vocabulary); - } - if (!ext_scorer->is_character_based() && - ext_scorer->dictionary == nullptr) { - // init dictionary - ext_scorer->fill_dictionary(true); - } - } - // enqueue the tasks of decoding std::vector>>> res; for (size_t i = 0; i < batch_size; ++i) { diff --git a/decoders/swig/ctc_decoders.h b/decoders/swig/ctc_beam_search_decoder.h similarity index 75% rename from decoders/swig/ctc_decoders.h rename to decoders/swig/ctc_beam_search_decoder.h index 6384c8a8f..c800384e5 100644 --- a/decoders/swig/ctc_decoders.h +++ b/decoders/swig/ctc_beam_search_decoder.h @@ -7,19 +7,6 @@ #include "scorer.h" -/* CTC Best Path Decoder - * - * Parameters: - * probs_seq: 2-D vector that each element is a vector of probabilities - * over vocabulary of one time step. - * vocabulary: A vector of vocabulary. - * Return: - * The decoding result in string - */ -std::string ctc_greedy_decoder( - const std::vector> &probs_seq, - const std::vector &vocabulary); - /* CTC Beam Search Decoder * Parameters: @@ -38,11 +25,11 @@ std::string ctc_greedy_decoder( */ std::vector> ctc_beam_search_decoder( const std::vector> &probs_seq, - const size_t beam_size, + size_t beam_size, std::vector vocabulary, - const double cutoff_prob = 1.0, - const size_t cutoff_top_n = 40, - Scorer *ext_scorer = NULL); + double cutoff_prob = 1.0, + size_t cutoff_top_n = 40, + Scorer *ext_scorer = nullptr); /* CTC Beam Search Decoder for batch data @@ -65,11 +52,11 @@ std::vector> ctc_beam_search_decoder( std::vector>> ctc_beam_search_decoder_batch( const std::vector>> &probs_split, - const size_t beam_size, + size_t beam_size, const std::vector &vocabulary, - const size_t num_processes, + size_t num_processes, double cutoff_prob = 1.0, - const size_t cutoff_top_n = 40, - Scorer *ext_scorer = NULL); + size_t cutoff_top_n = 40, + Scorer *ext_scorer = nullptr); #endif // CTC_BEAM_SEARCH_DECODER_H_ diff --git a/decoders/swig/ctc_greedy_decoder.cpp b/decoders/swig/ctc_greedy_decoder.cpp new file mode 100644 index 000000000..c4c94539e --- /dev/null +++ b/decoders/swig/ctc_greedy_decoder.cpp @@ -0,0 +1,45 @@ +#include "ctc_greedy_decoder.h" +#include "decoder_utils.h" + +std::string ctc_greedy_decoder( + const std::vector> &probs_seq, + const std::vector &vocabulary) { + // dimension check + size_t num_time_steps = probs_seq.size(); + for (size_t i = 0; i < num_time_steps; ++i) { + VALID_CHECK_EQ(probs_seq[i].size(), + vocabulary.size() + 1, + "The shape of probs_seq does not match with " + "the shape of the vocabulary"); + } + + size_t blank_id = vocabulary.size(); + + std::vector max_idx_vec(num_time_steps, 0); + std::vector idx_vec; + for (size_t i = 0; i < num_time_steps; ++i) { + double max_prob = 0.0; + size_t max_idx = 0; + const std::vector &probs_step = probs_seq[i]; + for (size_t j = 0; j < probs_step.size(); ++j) { + if (max_prob < probs_step[j]) { + max_idx = j; + max_prob = probs_step[j]; + } + } + // id with maximum probability in current step + max_idx_vec[i] = max_idx; + // deduplicate + if ((i == 0) || ((i > 0) && max_idx_vec[i] != max_idx_vec[i - 1])) { + idx_vec.push_back(max_idx_vec[i]); + } + } + + std::string best_path_result; + for (size_t i = 0; i < idx_vec.size(); ++i) { + if (idx_vec[i] != blank_id) { + best_path_result += vocabulary[idx_vec[i]]; + } + } + return best_path_result; +} diff --git a/decoders/swig/ctc_greedy_decoder.h b/decoders/swig/ctc_greedy_decoder.h new file mode 100644 index 000000000..043742f26 --- /dev/null +++ b/decoders/swig/ctc_greedy_decoder.h @@ -0,0 +1,20 @@ +#ifndef CTC_GREEDY_DECODER_H +#define CTC_GREEDY_DECODER_H + +#include +#include + +/* CTC Greedy (Best Path) Decoder + * + * Parameters: + * probs_seq: 2-D vector that each element is a vector of probabilities + * over vocabulary of one time step. + * vocabulary: A vector of vocabulary. + * Return: + * The decoding result in string + */ +std::string ctc_greedy_decoder( + const std::vector> &probs_seq, + const std::vector &vocabulary); + +#endif // CTC_GREEDY_DECODER_H diff --git a/decoders/swig/decoder_utils.cpp b/decoders/swig/decoder_utils.cpp index 989b067e7..665fcc22f 100644 --- a/decoders/swig/decoder_utils.cpp +++ b/decoders/swig/decoder_utils.cpp @@ -4,6 +4,71 @@ #include #include +std::vector> get_pruned_log_probs( + const std::vector &prob_step, + double cutoff_prob, + size_t cutoff_top_n) { + std::vector> prob_idx; + for (size_t i = 0; i < prob_step.size(); ++i) { + prob_idx.push_back(std::pair(i, prob_step[i])); + } + // pruning of vacobulary + size_t cutoff_len = prob_step.size(); + if (cutoff_prob < 1.0 || cutoff_top_n < cutoff_len) { + std::sort( + prob_idx.begin(), prob_idx.end(), pair_comp_second_rev); + if (cutoff_prob < 1.0) { + double cum_prob = 0.0; + cutoff_len = 0; + for (size_t i = 0; i < prob_idx.size(); ++i) { + cum_prob += prob_idx[i].second; + cutoff_len += 1; + if (cum_prob >= cutoff_prob) break; + } + } + cutoff_len = std::min(cutoff_len, cutoff_top_n); + prob_idx = std::vector>( + prob_idx.begin(), prob_idx.begin() + cutoff_len); + } + std::vector> log_prob_idx; + for (size_t i = 0; i < cutoff_len; ++i) { + log_prob_idx.push_back(std::pair( + prob_idx[i].first, log(prob_idx[i].second + NUM_FLT_MIN))); + } + return log_prob_idx; +} + + +std::vector> get_beam_search_result( + const std::vector &prefixes, + const std::vector &vocabulary, + size_t beam_size) { + // allow for the post processing + std::vector space_prefixes; + if (space_prefixes.empty()) { + for (size_t i = 0; i < beam_size && i < prefixes.size(); ++i) { + space_prefixes.push_back(prefixes[i]); + } + } + + std::sort(space_prefixes.begin(), space_prefixes.end(), prefix_compare); + std::vector> output_vecs; + for (size_t i = 0; i < beam_size && i < space_prefixes.size(); ++i) { + std::vector output; + space_prefixes[i]->get_path_vec(output); + // convert index to string + std::string output_str; + for (size_t j = 0; j < output.size(); j++) { + output_str += vocabulary[output[j]]; + } + std::pair output_pair(-space_prefixes[i]->approx_ctc, + output_str); + output_vecs.emplace_back(output_pair); + } + + return output_vecs; +} + size_t get_utf8_str_len(const std::string &str) { size_t str_len = 0; for (char c : str) { diff --git a/decoders/swig/decoder_utils.h b/decoders/swig/decoder_utils.h index 015646ddd..932ffb12f 100644 --- a/decoders/swig/decoder_utils.h +++ b/decoders/swig/decoder_utils.h @@ -3,25 +3,26 @@ #include #include "path_trie.h" +#include "fst/log.h" const float NUM_FLT_INF = std::numeric_limits::max(); const float NUM_FLT_MIN = std::numeric_limits::min(); -// check if __A == _B -#define VALID_CHECK_EQ(__A, __B, __ERR) \ - if ((__A) != (__B)) { \ - std::ostringstream str; \ - str << (__A) << " != " << (__B) << ", "; \ - throw std::runtime_error(str.str() + __ERR); \ +// inline function for validation check +inline void check( + bool x, const char *expr, const char *file, int line, const char *err) { + if (!x) { + std::cout << "[" << file << ":" << line << "] "; + LOG(FATAL) << "\"" << expr << "\" check failed. " << err; } +} + +#define VALID_CHECK(x, info) \ + check(static_cast(x), #x, __FILE__, __LINE__, info) +#define VALID_CHECK_EQ(x, y, info) VALID_CHECK((x) == (y), info) +#define VALID_CHECK_GT(x, y, info) VALID_CHECK((x) > (y), info) +#define VALID_CHECK_LT(x, y, info) VALID_CHECK((x) < (y), info) -// check if __A > __B -#define VALID_CHECK_GT(__A, __B, __ERR) \ - if ((__A) <= (__B)) { \ - std::ostringstream str; \ - str << (__A) << " <= " << (__B) << ", "; \ - throw std::runtime_error(str.str() + __ERR); \ - } // Function template for comparing two pairs template @@ -47,6 +48,18 @@ T log_sum_exp(const T &x, const T &y) { return std::log(std::exp(x - xmax) + std::exp(y - xmax)) + xmax; } +// Get pruned probability vector for each time step's beam search +std::vector> get_pruned_log_probs( + const std::vector &prob_step, + double cutoff_prob, + size_t cutoff_top_n); + +// Get beam search result from prefixes in trie tree +std::vector> get_beam_search_result( + const std::vector &prefixes, + const std::vector &vocabulary, + size_t beam_size); + // Functor for prefix comparsion bool prefix_compare(const PathTrie *x, const PathTrie *y); diff --git a/decoders/swig/decoders.i b/decoders/swig/decoders.i index 8059199d1..4227d4a37 100644 --- a/decoders/swig/decoders.i +++ b/decoders/swig/decoders.i @@ -1,7 +1,8 @@ %module swig_decoders %{ #include "scorer.h" -#include "ctc_decoders.h" +#include "ctc_greedy_decoder.h" +#include "ctc_beam_search_decoder.h" #include "decoder_utils.h" %} @@ -28,4 +29,5 @@ namespace std { %template(DoubleStringPairCompFirstRev) pair_comp_first_rev; %include "scorer.h" -%include "ctc_decoders.h" +%include "ctc_greedy_decoder.h" +%include "ctc_beam_search_decoder.h" diff --git a/decoders/swig/path_trie.h b/decoders/swig/path_trie.h index ddeccd910..b4f5bc4ba 100644 --- a/decoders/swig/path_trie.h +++ b/decoders/swig/path_trie.h @@ -1,14 +1,13 @@ #ifndef PATH_TRIE_H #define PATH_TRIE_H -#pragma once -#include + #include #include #include #include #include -using FSTMATCH = fst::SortedMatcher; +#include "fst/fstlib.h" /* Trie tree for prefix storing and manipulating, with a dictionary in * finite-state transducer for spelling correction. @@ -35,7 +34,7 @@ public: // set dictionary for FST void set_dictionary(fst::StdVectorFst* dictionary); - void set_matcher(std::shared_ptr matcher); + void set_matcher(std::shared_ptr>); bool is_empty() { return _ROOT == character; } @@ -62,7 +61,7 @@ private: fst::StdVectorFst* _dictionary; fst::StdVectorFst::StateId _dictionary_state; // true if finding ars in FST - std::shared_ptr _matcher; + std::shared_ptr> _matcher; }; #endif // PATH_TRIE_H diff --git a/decoders/swig/scorer.cpp b/decoders/swig/scorer.cpp index 75919c3c9..6b2803443 100644 --- a/decoders/swig/scorer.cpp +++ b/decoders/swig/scorer.cpp @@ -13,29 +13,47 @@ using namespace lm::ngram; -Scorer::Scorer(double alpha, double beta, const std::string& lm_path) { +Scorer::Scorer(double alpha, + double beta, + const std::string& lm_path, + const std::vector& vocab_list) { this->alpha = alpha; this->beta = beta; _is_character_based = true; _language_model = nullptr; dictionary = nullptr; _max_order = 0; + _dict_size = 0; _SPACE_ID = -1; - // load language model - load_LM(lm_path.c_str()); + + setup(lm_path, vocab_list); } Scorer::~Scorer() { - if (_language_model != nullptr) + if (_language_model != nullptr) { delete static_cast(_language_model); - if (dictionary != nullptr) delete static_cast(dictionary); + } + if (dictionary != nullptr) { + delete static_cast(dictionary); + } } -void Scorer::load_LM(const char* filename) { - if (access(filename, F_OK) != 0) { - std::cerr << "Invalid language model file !!!" << std::endl; - exit(1); +void Scorer::setup(const std::string& lm_path, + const std::vector& vocab_list) { + // load language model + load_lm(lm_path); + // set char map for scorer + set_char_map(vocab_list); + // fill the dictionary for FST + if (!is_character_based()) { + fill_dictionary(true); } +} + +void Scorer::load_lm(const std::string& lm_path) { + const char* filename = lm_path.c_str(); + VALID_CHECK_EQ(access(filename, F_OK), 0, "Invalid language model path"); + RetriveStrEnumerateVocab enumerate; lm::ngram::Config config; config.enumerate_vocab = &enumerate; @@ -180,14 +198,14 @@ void Scorer::fill_dictionary(bool add_space) { } // For each unigram convert to ints and put in trie - int vocab_size = 0; + int dict_size = 0; for (const auto& word : _vocabulary) { bool added = add_word_to_dictionary( word, char_map, add_space, _SPACE_ID, &dictionary); - vocab_size += added ? 1 : 0; + dict_size += added ? 1 : 0; } - std::cerr << "Vocab Size " << vocab_size << std::endl; + _dict_size = dict_size; /* Simplify FST diff --git a/decoders/swig/scorer.h b/decoders/swig/scorer.h index 1b4857e38..72544da7b 100644 --- a/decoders/swig/scorer.h +++ b/decoders/swig/scorer.h @@ -40,31 +40,32 @@ public: */ class Scorer { public: - Scorer(double alpha, double beta, const std::string &lm_path); + Scorer(double alpha, + double beta, + const std::string &lm_path, + const std::vector &vocabulary); ~Scorer(); double get_log_cond_prob(const std::vector &words); double get_sent_log_prob(const std::vector &words); - size_t get_max_order() { return _max_order; } + size_t get_max_order() const { return _max_order; } - bool is_char_map_empty() { return _char_map.size() == 0; } + size_t get_dict_size() const { return _dict_size; } - bool is_character_based() { return _is_character_based; } + bool is_char_map_empty() const { return _char_map.size() == 0; } + + bool is_character_based() const { return _is_character_based; } // reset params alpha & beta void reset_params(float alpha, float beta); - // make ngram + // make ngram for a given prefix std::vector make_ngram(PathTrie *prefix); - // fill dictionary for fst - void fill_dictionary(bool add_space); - - // set char map - void set_char_map(const std::vector &char_list); - + // trransform the labels in index to the vector of words (word based lm) or + // the vector of characters (character based lm) std::vector split_labels(const std::vector &labels); // expose to decoder @@ -75,7 +76,16 @@ public: void *dictionary; protected: - void load_LM(const char *filename); + void setup(const std::string &lm_path, + const std::vector &vocab_list); + + void load_lm(const std::string &lm_path); + + // fill dictionary for fst + void fill_dictionary(bool add_space); + + // set char map + void set_char_map(const std::vector &char_list); double get_log_prob(const std::vector &words); @@ -85,6 +95,7 @@ private: void *_language_model; bool _is_character_based; size_t _max_order; + size_t _dict_size; int _SPACE_ID; std::vector _char_list; diff --git a/decoders/swig/setup.py b/decoders/swig/setup.py index 7a4b7e02c..8af9ff304 100644 --- a/decoders/swig/setup.py +++ b/decoders/swig/setup.py @@ -70,8 +70,11 @@ FILES = glob.glob('kenlm/util/*.cc') \ FILES += glob.glob('openfst-1.6.3/src/lib/*.cc') +# FILES + glob.glob('glog/src/*.cc') FILES = [ - fn for fn in FILES if not (fn.endswith('main.cc') or fn.endswith('test.cc')) + fn for fn in FILES + if not (fn.endswith('main.cc') or fn.endswith('test.cc') or fn.endswith( + 'unittest.cc')) ] LIBS = ['stdc++'] @@ -99,7 +102,13 @@ decoders_module = [ name='_swig_decoders', sources=FILES + glob.glob('*.cxx') + glob.glob('*.cpp'), language='c++', - include_dirs=['.', 'kenlm', 'openfst-1.6.3/src/include', 'ThreadPool'], + include_dirs=[ + '.', + 'kenlm', + 'openfst-1.6.3/src/include', + 'ThreadPool', + #'glog/src' + ], libraries=LIBS, extra_compile_args=ARGS) ] diff --git a/decoders/swig/setup.sh b/decoders/swig/setup.sh index 069f51d6e..78ae2b201 100644 --- a/decoders/swig/setup.sh +++ b/decoders/swig/setup.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash if [ ! -d kenlm ]; then git clone https://github.com/luotao1/kenlm.git diff --git a/decoders/swig_wrapper.py b/decoders/swig_wrapper.py index 54ed249f3..5ebcd133c 100644 --- a/decoders/swig_wrapper.py +++ b/decoders/swig_wrapper.py @@ -13,14 +13,14 @@ class Scorer(swig_decoders.Scorer): language model when alpha = 0. :type alpha: float :param beta: Parameter associated with word count. Don't use word - count when beta = 0. + count when beta = 0. :type beta: float :model_path: Path to load language model. :type model_path: basestring """ - def __init__(self, alpha, beta, model_path): - swig_decoders.Scorer.__init__(self, alpha, beta, model_path) + def __init__(self, alpha, beta, model_path, vocabulary): + swig_decoders.Scorer.__init__(self, alpha, beta, model_path, vocabulary) def ctc_greedy_decoder(probs_seq, vocabulary): @@ -58,12 +58,12 @@ def ctc_beam_search_decoder(probs_seq, default 1.0, no pruning. :type cutoff_prob: float :param cutoff_top_n: Cutoff number in pruning, only top cutoff_top_n - characters with highest probs in vocabulary will be - used in beam search, default 40. + characters with highest probs in vocabulary will be + used in beam search, default 40. :type cutoff_top_n: int :param ext_scoring_func: External scoring function for - partially decoded sentence, e.g. word count - or language model. + partially decoded sentence, e.g. word count + or language model. :type external_scoring_func: callable :return: List of tuples of log probability and sentence as decoding results, in descending order of the probability. @@ -96,14 +96,14 @@ def ctc_beam_search_decoder_batch(probs_split, default 1.0, no pruning. :type cutoff_prob: float :param cutoff_top_n: Cutoff number in pruning, only top cutoff_top_n - characters with highest probs in vocabulary will be - used in beam search, default 40. + characters with highest probs in vocabulary will be + used in beam search, default 40. :type cutoff_top_n: int :param num_processes: Number of parallel processes. :type num_processes: int :param ext_scoring_func: External scoring function for - partially decoded sentence, e.g. word count - or language model. + partially decoded sentence, e.g. word count + or language model. :type external_scoring_function: callable :return: List of tuples of log probability and sentence as decoding results, in descending order of the probability. diff --git a/examples/tiny/run_infer.sh b/examples/tiny/run_infer.sh index 1d33bfbba..1e90f6081 100644 --- a/examples/tiny/run_infer.sh +++ b/examples/tiny/run_infer.sh @@ -21,9 +21,9 @@ python -u infer.py \ --num_conv_layers=2 \ --num_rnn_layers=3 \ --rnn_layer_size=2048 \ ---alpha=0.36 \ ---beta=0.25 \ ---cutoff_prob=0.99 \ +--alpha=2.15 \ +--beta=0.35 \ +--cutoff_prob=1.0 \ --use_gru=False \ --use_gpu=True \ --share_rnn_weights=True \ diff --git a/examples/tiny/run_infer_golden.sh b/examples/tiny/run_infer_golden.sh index 32e9d8623..40bb30337 100644 --- a/examples/tiny/run_infer_golden.sh +++ b/examples/tiny/run_infer_golden.sh @@ -30,9 +30,9 @@ python -u infer.py \ --num_conv_layers=2 \ --num_rnn_layers=3 \ --rnn_layer_size=2048 \ ---alpha=0.36 \ ---beta=0.25 \ ---cutoff_prob=0.99 \ +--alpha=2.15 \ +--beta=0.35 \ +--cutoff_prob=1.0 \ --use_gru=False \ --use_gpu=True \ --share_rnn_weights=True \ diff --git a/examples/tiny/run_test.sh b/examples/tiny/run_test.sh index f9c3cc11c..868a045f4 100644 --- a/examples/tiny/run_test.sh +++ b/examples/tiny/run_test.sh @@ -22,9 +22,9 @@ python -u test.py \ --num_conv_layers=2 \ --num_rnn_layers=3 \ --rnn_layer_size=2048 \ ---alpha=0.36 \ ---beta=0.25 \ ---cutoff_prob=0.99 \ +--alpha=2.15 \ +--beta=0.35 \ +--cutoff_prob=1.0 \ --use_gru=False \ --use_gpu=True \ --share_rnn_weights=True \ diff --git a/examples/tiny/run_test_golden.sh b/examples/tiny/run_test_golden.sh index 080c3c062..1a4731dd1 100644 --- a/examples/tiny/run_test_golden.sh +++ b/examples/tiny/run_test_golden.sh @@ -31,9 +31,9 @@ python -u test.py \ --num_conv_layers=2 \ --num_rnn_layers=3 \ --rnn_layer_size=2048 \ ---alpha=0.36 \ ---beta=0.25 \ ---cutoff_prob=0.99 \ +--alpha=2.15 \ +--beta=0.35 \ +--cutoff_prob=1.0 \ --use_gru=False \ --use_gpu=True \ --share_rnn_weights=True \ diff --git a/infer.py b/infer.py index 1064fd25a..e635f6d0f 100644 --- a/infer.py +++ b/infer.py @@ -112,6 +112,7 @@ def infer(): print("Current error rate [%s] = %f" % (args.error_rate_type, error_rate_func(target, result))) + ds2_model.logger.info("finish inference") def main(): print_arguments(args) diff --git a/model_utils/model.py b/model_utils/model.py index 4f5021a6d..66b161c3e 100644 --- a/model_utils/model.py +++ b/model_utils/model.py @@ -6,6 +6,7 @@ from __future__ import print_function import sys import os import time +import logging import gzip import paddle.v2 as paddle from decoders.swig_wrapper import Scorer @@ -13,6 +14,9 @@ from decoders.swig_wrapper import ctc_greedy_decoder from decoders.swig_wrapper import ctc_beam_search_decoder_batch from model_utils.network import deep_speech_v2_network +logging.basicConfig( + format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)d] %(message)s') + class DeepSpeech2Model(object): """DeepSpeech2Model class. @@ -43,6 +47,8 @@ class DeepSpeech2Model(object): self._inferer = None self._loss_inferer = None self._ext_scorer = None + self.logger = logging.getLogger("") + self.logger.setLevel(level=logging.INFO) def train(self, train_batch_reader, @@ -204,16 +210,25 @@ class DeepSpeech2Model(object): elif decoding_method == "ctc_beam_search": # initialize external scorer if self._ext_scorer == None: - self._ext_scorer = Scorer(beam_alpha, beam_beta, - language_model_path) self._loaded_lm_path = language_model_path - self._ext_scorer.set_char_map(vocab_list) - if (not self._ext_scorer.is_character_based()): - self._ext_scorer.fill_dictionary(True) + self.logger.info("begin to initialize the external scorer " + "for decoding") + self._ext_scorer = Scorer(beam_alpha, beam_beta, + language_model_path, vocab_list) + + lm_char_based = self._ext_scorer.is_character_based() + lm_max_order = self._ext_scorer.get_max_order() + lm_dict_size = self._ext_scorer.get_dict_size() + self.logger.info("language model: " + "is_character_based = %d," % lm_char_based + + " max_order = %d," % lm_max_order + + " dict_size = %d" % lm_dict_size) + self.logger.info("end initializing scorer. Start decoding ...") else: self._ext_scorer.reset_params(beam_alpha, beam_beta) assert self._loaded_lm_path == language_model_path # beam search decode + num_processes = min(num_processes, len(probs_split)) beam_search_results = ctc_beam_search_decoder_batch( probs_split=probs_split, vocabulary=vocab_list, diff --git a/test.py b/test.py index c564bb85d..40f0795a1 100644 --- a/test.py +++ b/test.py @@ -115,6 +115,7 @@ def evaluate(): print("Final error rate [%s] (%d/%d) = %f" % (args.error_rate_type, num_ins, num_ins, error_sum / num_ins)) + ds2_model.logger.info("finish evaluation") def main(): print_arguments(args) From 1b206b339001636aa0839e04a78c381534da063b Mon Sep 17 00:00:00 2001 From: yangyaming Date: Sun, 17 Sep 2017 19:38:33 +0800 Subject: [PATCH 198/335] fix bugs for model.py and demo_server.py. --- cloud/pcloud_submit.sh | 2 +- cloud/pcloud_train.sh | 2 +- cloud/pcloud_upload_data.sh | 2 +- deploy/demo_server.py | 2 +- examples/librispeech/run_data.sh | 2 +- examples/librispeech/run_infer.sh | 2 +- examples/librispeech/run_infer_golden.sh | 2 +- examples/librispeech/run_test.sh | 2 +- examples/librispeech/run_test_golden.sh | 2 +- examples/librispeech/run_train.sh | 2 +- examples/librispeech/run_tune.sh | 2 +- examples/mandarin/run_demo_client.sh | 2 +- examples/mandarin/run_demo_server.sh | 2 +- examples/tiny/run_data.sh | 2 +- examples/tiny/run_infer.sh | 2 +- examples/tiny/run_infer_golden.sh | 2 +- examples/tiny/run_test.sh | 2 +- examples/tiny/run_test_golden.sh | 2 +- examples/tiny/run_train.sh | 2 +- examples/tiny/run_tune.sh | 2 +- model_utils/model.py | 3 ++- models/aishell/download_model.sh | 2 +- models/librispeech/download_model.sh | 2 +- models/lm/download_lm_ch.sh | 2 +- models/lm/download_lm_en.sh | 2 +- setup.sh | 2 +- 26 files changed, 27 insertions(+), 26 deletions(-) diff --git a/cloud/pcloud_submit.sh b/cloud/pcloud_submit.sh index 378a7c6e6..99e458db9 100644 --- a/cloud/pcloud_submit.sh +++ b/cloud/pcloud_submit.sh @@ -1,4 +1,4 @@ -#! /usr/bin/bash +#! /usr/bin/env bash TRAIN_MANIFEST="cloud/cloud_manifests/cloud.manifest.train" DEV_MANIFEST="cloud/cloud_manifests/cloud.manifest.dev" diff --git a/cloud/pcloud_train.sh b/cloud/pcloud_train.sh index d04132f90..26e537c27 100644 --- a/cloud/pcloud_train.sh +++ b/cloud/pcloud_train.sh @@ -1,4 +1,4 @@ -#! /usr/bin/bash +#! /usr/bin/env bash TRAIN_MANIFEST=$1 DEV_MANIFEST=$2 diff --git a/cloud/pcloud_upload_data.sh b/cloud/pcloud_upload_data.sh index 4ef235ef7..71bb4af19 100644 --- a/cloud/pcloud_upload_data.sh +++ b/cloud/pcloud_upload_data.sh @@ -1,4 +1,4 @@ -#! /usr/bin/bash +#! /usr/bin/env bash mkdir cloud_manifests diff --git a/deploy/demo_server.py b/deploy/demo_server.py index a7157001c..7c5584191 100644 --- a/deploy/demo_server.py +++ b/deploy/demo_server.py @@ -100,7 +100,7 @@ class AsrRequestHandler(SocketServer.BaseRequestHandler): finish_time = time.time() print("Response Time: %f, Transcript: %s" % (finish_time - start_time, transcript)) - self.request.sendall(transcript) + self.request.sendall(transcript.encode('utf-8')) def _write_to_file(self, data): # prepare save dir and filename diff --git a/examples/librispeech/run_data.sh b/examples/librispeech/run_data.sh index f65aa233b..bdd5abb58 100644 --- a/examples/librispeech/run_data.sh +++ b/examples/librispeech/run_data.sh @@ -1,4 +1,4 @@ -#! /usr/bin/bash +#! /usr/bin/env bash pushd ../.. > /dev/null diff --git a/examples/librispeech/run_infer.sh b/examples/librispeech/run_infer.sh index 6b790502a..eb812440b 100644 --- a/examples/librispeech/run_infer.sh +++ b/examples/librispeech/run_infer.sh @@ -1,4 +1,4 @@ -#! /usr/bin/bash +#! /usr/bin/env bash pushd ../.. > /dev/null diff --git a/examples/librispeech/run_infer_golden.sh b/examples/librispeech/run_infer_golden.sh index 679bd1bf8..eeccfdebb 100644 --- a/examples/librispeech/run_infer_golden.sh +++ b/examples/librispeech/run_infer_golden.sh @@ -1,4 +1,4 @@ -#! /usr/bin/bash +#! /usr/bin/env bash pushd ../.. > /dev/null diff --git a/examples/librispeech/run_test.sh b/examples/librispeech/run_test.sh index 9709234ab..7ef06ba9f 100644 --- a/examples/librispeech/run_test.sh +++ b/examples/librispeech/run_test.sh @@ -1,4 +1,4 @@ -#! /usr/bin/bash +#! /usr/bin/env bash pushd ../.. > /dev/null diff --git a/examples/librispeech/run_test_golden.sh b/examples/librispeech/run_test_golden.sh index a505cdc79..86fe15306 100644 --- a/examples/librispeech/run_test_golden.sh +++ b/examples/librispeech/run_test_golden.sh @@ -1,4 +1,4 @@ -#! /usr/bin/bash +#! /usr/bin/env bash pushd ../.. > /dev/null diff --git a/examples/librispeech/run_train.sh b/examples/librispeech/run_train.sh index 07575dde1..9aa5e0d16 100644 --- a/examples/librispeech/run_train.sh +++ b/examples/librispeech/run_train.sh @@ -1,4 +1,4 @@ -#! /usr/bin/bash +#! /usr/bin/env bash pushd ../.. > /dev/null diff --git a/examples/librispeech/run_tune.sh b/examples/librispeech/run_tune.sh index 05c024bec..abc28d366 100644 --- a/examples/librispeech/run_tune.sh +++ b/examples/librispeech/run_tune.sh @@ -1,4 +1,4 @@ -#! /usr/bin/bash +#! /usr/bin/env bash pushd ../.. > /dev/null diff --git a/examples/mandarin/run_demo_client.sh b/examples/mandarin/run_demo_client.sh index dfde20f88..bf8e54514 100644 --- a/examples/mandarin/run_demo_client.sh +++ b/examples/mandarin/run_demo_client.sh @@ -1,4 +1,4 @@ -#! /usr/bin/bash +#! /usr/bin/env bash pushd ../.. > /dev/null diff --git a/examples/mandarin/run_demo_server.sh b/examples/mandarin/run_demo_server.sh index 703184a6b..b0d4bc7f1 100644 --- a/examples/mandarin/run_demo_server.sh +++ b/examples/mandarin/run_demo_server.sh @@ -1,4 +1,4 @@ -#! /usr/bin/bash +#! /usr/bin/env bash # TODO: replace the model with a mandarin model pushd ../.. > /dev/null diff --git a/examples/tiny/run_data.sh b/examples/tiny/run_data.sh index 46266daaf..a98dab214 100644 --- a/examples/tiny/run_data.sh +++ b/examples/tiny/run_data.sh @@ -1,4 +1,4 @@ -#! /usr/bin/bash +#! /usr/bin/env bash pushd ../.. > /dev/null diff --git a/examples/tiny/run_infer.sh b/examples/tiny/run_infer.sh index 1d33bfbba..dafc99d9c 100644 --- a/examples/tiny/run_infer.sh +++ b/examples/tiny/run_infer.sh @@ -1,4 +1,4 @@ -#! /usr/bin/bash +#! /usr/bin/env bash pushd ../.. > /dev/null diff --git a/examples/tiny/run_infer_golden.sh b/examples/tiny/run_infer_golden.sh index 32e9d8623..66360a691 100644 --- a/examples/tiny/run_infer_golden.sh +++ b/examples/tiny/run_infer_golden.sh @@ -1,4 +1,4 @@ -#! /usr/bin/bash +#! /usr/bin/env bash pushd ../.. > /dev/null diff --git a/examples/tiny/run_test.sh b/examples/tiny/run_test.sh index f9c3cc11c..70cf4bfe2 100644 --- a/examples/tiny/run_test.sh +++ b/examples/tiny/run_test.sh @@ -1,4 +1,4 @@ -#! /usr/bin/bash +#! /usr/bin/env bash pushd ../.. > /dev/null diff --git a/examples/tiny/run_test_golden.sh b/examples/tiny/run_test_golden.sh index 080c3c062..e188c81b3 100644 --- a/examples/tiny/run_test_golden.sh +++ b/examples/tiny/run_test_golden.sh @@ -1,4 +1,4 @@ -#! /usr/bin/bash +#! /usr/bin/env bash pushd ../.. > /dev/null diff --git a/examples/tiny/run_train.sh b/examples/tiny/run_train.sh index 74d82712e..3c2b8a1e0 100644 --- a/examples/tiny/run_train.sh +++ b/examples/tiny/run_train.sh @@ -1,4 +1,4 @@ -#! /usr/bin/bash +#! /usr/bin/env bash pushd ../.. > /dev/null diff --git a/examples/tiny/run_tune.sh b/examples/tiny/run_tune.sh index 360c11d59..926e9f8d5 100644 --- a/examples/tiny/run_tune.sh +++ b/examples/tiny/run_tune.sh @@ -1,4 +1,4 @@ -#! /usr/bin/bash +#! /usr/bin/env bash pushd ../.. > /dev/null diff --git a/model_utils/model.py b/model_utils/model.py index cf146f8ce..09ee3c761 100644 --- a/model_utils/model.py +++ b/model_utils/model.py @@ -7,6 +7,7 @@ import sys import os import time import gzip +from distutils.dir_util import mkpath import paddle.v2 as paddle from model_utils.lm_scorer import LmScorer from model_utils.decoder import ctc_greedy_decoder, ctc_beam_search_decoder @@ -79,7 +80,7 @@ class DeepSpeech2Model(object): """ # prepare model output directory if not os.path.exists(output_model_dir): - os.mkdir(output_model_dir) + mkpath(output_model_dir) # prepare optimizer and trainer optimizer = paddle.optimizer.Adam( diff --git a/models/aishell/download_model.sh b/models/aishell/download_model.sh index 4368ee55a..77fc84b53 100644 --- a/models/aishell/download_model.sh +++ b/models/aishell/download_model.sh @@ -1,4 +1,4 @@ -#! /usr/bin/bash +#! /usr/bin/env bash source ../../utils/utility.sh diff --git a/models/librispeech/download_model.sh b/models/librispeech/download_model.sh index b5fcd7d8c..336502de8 100644 --- a/models/librispeech/download_model.sh +++ b/models/librispeech/download_model.sh @@ -1,4 +1,4 @@ -#! /usr/bin/bash +#! /usr/bin/env bash source ../../utils/utility.sh diff --git a/models/lm/download_lm_ch.sh b/models/lm/download_lm_ch.sh index 7f1c47a27..46bfe9329 100644 --- a/models/lm/download_lm_ch.sh +++ b/models/lm/download_lm_ch.sh @@ -1,4 +1,4 @@ -#! /usr/bin/bash +#! /usr/bin/env bash source ../../utils/utility.sh diff --git a/models/lm/download_lm_en.sh b/models/lm/download_lm_en.sh index e967e25dc..fbfe647e9 100644 --- a/models/lm/download_lm_en.sh +++ b/models/lm/download_lm_en.sh @@ -1,4 +1,4 @@ -#! /usr/bin/bash +#! /usr/bin/env bash source ../../utils/utility.sh diff --git a/setup.sh b/setup.sh index 6c8a70994..15c6e1e25 100644 --- a/setup.sh +++ b/setup.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#! /usr/bin/env bash # install python dependencies if [ -f "requirements.txt" ]; then From 3018dcb4d909ca60bab5434df4899481354fbf63 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Sun, 17 Sep 2017 21:30:59 +0800 Subject: [PATCH 199/335] format varabiables' name & add more comments --- decoders/swig/ctc_beam_search_decoder.cpp | 15 ++--- decoders/swig/ctc_beam_search_decoder.h | 9 ++- decoders/swig/path_trie.cpp | 76 ++++++++++----------- decoders/swig/path_trie.h | 16 ++--- decoders/swig/scorer.cpp | 82 +++++++++++------------ decoders/swig/scorer.h | 39 ++++++----- decoders/swig_wrapper.py | 18 ++--- 7 files changed, 129 insertions(+), 126 deletions(-) diff --git a/decoders/swig/ctc_beam_search_decoder.cpp b/decoders/swig/ctc_beam_search_decoder.cpp index 36d169871..5c8373bea 100644 --- a/decoders/swig/ctc_beam_search_decoder.cpp +++ b/decoders/swig/ctc_beam_search_decoder.cpp @@ -18,8 +18,8 @@ using FSTMATCH = fst::SortedMatcher; std::vector> ctc_beam_search_decoder( const std::vector> &probs_seq, + const std::vector &vocabulary, size_t beam_size, - std::vector vocabulary, double cutoff_prob, size_t cutoff_top_n, Scorer *ext_scorer) { @@ -36,8 +36,7 @@ std::vector> ctc_beam_search_decoder( size_t blank_id = vocabulary.size(); // assign space id - std::vector::iterator it = - std::find(vocabulary.begin(), vocabulary.end(), " "); + auto it = std::find(vocabulary.begin(), vocabulary.end(), " "); int space_id = it - vocabulary.begin(); // if no space in vocabulary if ((size_t)space_id >= vocabulary.size()) { @@ -173,11 +172,11 @@ std::vector> ctc_beam_search_decoder( std::vector>> ctc_beam_search_decoder_batch( const std::vector>> &probs_split, - const size_t beam_size, const std::vector &vocabulary, - const size_t num_processes, - const double cutoff_prob, - const size_t cutoff_top_n, + size_t beam_size, + size_t num_processes, + double cutoff_prob, + size_t cutoff_top_n, Scorer *ext_scorer) { VALID_CHECK_GT(num_processes, 0, "num_processes must be nonnegative!"); // thread pool @@ -190,8 +189,8 @@ ctc_beam_search_decoder_batch( for (size_t i = 0; i < batch_size; ++i) { res.emplace_back(pool.enqueue(ctc_beam_search_decoder, probs_split[i], - beam_size, vocabulary, + beam_size, cutoff_prob, cutoff_top_n, ext_scorer)); diff --git a/decoders/swig/ctc_beam_search_decoder.h b/decoders/swig/ctc_beam_search_decoder.h index c800384e5..6fdd15517 100644 --- a/decoders/swig/ctc_beam_search_decoder.h +++ b/decoders/swig/ctc_beam_search_decoder.h @@ -12,8 +12,8 @@ * Parameters: * probs_seq: 2-D vector that each element is a vector of probabilities * over vocabulary of one time step. - * beam_size: The width of beam search. * vocabulary: A vector of vocabulary. + * beam_size: The width of beam search. * cutoff_prob: Cutoff probability for pruning. * cutoff_top_n: Cutoff number for pruning. * ext_scorer: External scorer to evaluate a prefix, which consists of @@ -25,8 +25,8 @@ */ std::vector> ctc_beam_search_decoder( const std::vector> &probs_seq, + const std::vector &vocabulary, size_t beam_size, - std::vector vocabulary, double cutoff_prob = 1.0, size_t cutoff_top_n = 40, Scorer *ext_scorer = nullptr); @@ -36,9 +36,8 @@ std::vector> ctc_beam_search_decoder( * Parameters: * probs_seq: 3-D vector that each element is a 2-D vector that can be used * by ctc_beam_search_decoder(). - * . - * beam_size: The width of beam search. * vocabulary: A vector of vocabulary. + * beam_size: The width of beam search. * num_processes: Number of threads for beam search. * cutoff_prob: Cutoff probability for pruning. * cutoff_top_n: Cutoff number for pruning. @@ -52,8 +51,8 @@ std::vector> ctc_beam_search_decoder( std::vector>> ctc_beam_search_decoder_batch( const std::vector>> &probs_split, - size_t beam_size, const std::vector &vocabulary, + size_t beam_size, size_t num_processes, double cutoff_prob = 1.0, size_t cutoff_top_n = 40, diff --git a/decoders/swig/path_trie.cpp b/decoders/swig/path_trie.cpp index 6a1f6170f..fdff32861 100644 --- a/decoders/swig/path_trie.cpp +++ b/decoders/swig/path_trie.cpp @@ -15,32 +15,32 @@ PathTrie::PathTrie() { log_prob_nb_cur = -NUM_FLT_INF; score = -NUM_FLT_INF; - _ROOT = -1; - character = _ROOT; - _exists = true; + ROOT_ = -1; + character = ROOT_; + exists_ = true; parent = nullptr; - _dictionary = nullptr; - _dictionary_state = 0; - _has_dictionary = false; - _matcher = nullptr; + dictionary_ = nullptr; + dictionary_state_ = 0; + has_dictionary_ = false; + matcher_ = nullptr; } PathTrie::~PathTrie() { - for (auto child : _children) { + for (auto child : children_) { delete child.second; } } PathTrie* PathTrie::get_path_trie(int new_char, bool reset) { - auto child = _children.begin(); - for (child = _children.begin(); child != _children.end(); ++child) { + auto child = children_.begin(); + for (child = children_.begin(); child != children_.end(); ++child) { if (child->first == new_char) { break; } } - if (child != _children.end()) { - if (!child->second->_exists) { - child->second->_exists = true; + if (child != children_.end()) { + if (!child->second->exists_) { + child->second->exists_ = true; child->second->log_prob_b_prev = -NUM_FLT_INF; child->second->log_prob_nb_prev = -NUM_FLT_INF; child->second->log_prob_b_cur = -NUM_FLT_INF; @@ -48,47 +48,47 @@ PathTrie* PathTrie::get_path_trie(int new_char, bool reset) { } return (child->second); } else { - if (_has_dictionary) { - _matcher->SetState(_dictionary_state); - bool found = _matcher->Find(new_char); + if (has_dictionary_) { + matcher_->SetState(dictionary_state_); + bool found = matcher_->Find(new_char); if (!found) { // Adding this character causes word outside dictionary auto FSTZERO = fst::TropicalWeight::Zero(); - auto final_weight = _dictionary->Final(_dictionary_state); + auto final_weight = dictionary_->Final(dictionary_state_); bool is_final = (final_weight != FSTZERO); if (is_final && reset) { - _dictionary_state = _dictionary->Start(); + dictionary_state_ = dictionary_->Start(); } return nullptr; } else { PathTrie* new_path = new PathTrie; new_path->character = new_char; new_path->parent = this; - new_path->_dictionary = _dictionary; - new_path->_dictionary_state = _matcher->Value().nextstate; - new_path->_has_dictionary = true; - new_path->_matcher = _matcher; - _children.push_back(std::make_pair(new_char, new_path)); + new_path->dictionary_ = dictionary_; + new_path->dictionary_state_ = matcher_->Value().nextstate; + new_path->has_dictionary_ = true; + new_path->matcher_ = matcher_; + children_.push_back(std::make_pair(new_char, new_path)); return new_path; } } else { PathTrie* new_path = new PathTrie; new_path->character = new_char; new_path->parent = this; - _children.push_back(std::make_pair(new_char, new_path)); + children_.push_back(std::make_pair(new_char, new_path)); return new_path; } } } PathTrie* PathTrie::get_path_vec(std::vector& output) { - return get_path_vec(output, _ROOT); + return get_path_vec(output, ROOT_); } PathTrie* PathTrie::get_path_vec(std::vector& output, int stop, size_t max_steps) { - if (character == stop || character == _ROOT || output.size() == max_steps) { + if (character == stop || character == ROOT_ || output.size() == max_steps) { std::reverse(output.begin(), output.end()); return this; } else { @@ -98,7 +98,7 @@ PathTrie* PathTrie::get_path_vec(std::vector& output, } void PathTrie::iterate_to_vec(std::vector& output) { - if (_exists) { + if (exists_) { log_prob_b_prev = log_prob_b_cur; log_prob_nb_prev = log_prob_nb_cur; @@ -108,25 +108,25 @@ void PathTrie::iterate_to_vec(std::vector& output) { score = log_sum_exp(log_prob_b_prev, log_prob_nb_prev); output.push_back(this); } - for (auto child : _children) { + for (auto child : children_) { child.second->iterate_to_vec(output); } } void PathTrie::remove() { - _exists = false; + exists_ = false; - if (_children.size() == 0) { - auto child = parent->_children.begin(); - for (child = parent->_children.begin(); child != parent->_children.end(); + if (children_.size() == 0) { + auto child = parent->children_.begin(); + for (child = parent->children_.begin(); child != parent->children_.end(); ++child) { if (child->first == character) { - parent->_children.erase(child); + parent->children_.erase(child); break; } } - if (parent->_children.size() == 0 && !parent->_exists) { + if (parent->children_.size() == 0 && !parent->exists_) { parent->remove(); } @@ -135,12 +135,12 @@ void PathTrie::remove() { } void PathTrie::set_dictionary(fst::StdVectorFst* dictionary) { - _dictionary = dictionary; - _dictionary_state = dictionary->Start(); - _has_dictionary = true; + dictionary_ = dictionary; + dictionary_state_ = dictionary->Start(); + has_dictionary_ = true; } using FSTMATCH = fst::SortedMatcher; void PathTrie::set_matcher(std::shared_ptr matcher) { - _matcher = matcher; + matcher_ = matcher; } diff --git a/decoders/swig/path_trie.h b/decoders/swig/path_trie.h index b4f5bc4ba..7fd715d26 100644 --- a/decoders/swig/path_trie.h +++ b/decoders/swig/path_trie.h @@ -36,7 +36,7 @@ public: void set_matcher(std::shared_ptr>); - bool is_empty() { return _ROOT == character; } + bool is_empty() { return ROOT_ == character; } // remove current path from root void remove(); @@ -51,17 +51,17 @@ public: PathTrie* parent; private: - int _ROOT; - bool _exists; - bool _has_dictionary; + int ROOT_; + bool exists_; + bool has_dictionary_; - std::vector> _children; + std::vector> children_; // pointer to dictionary of FST - fst::StdVectorFst* _dictionary; - fst::StdVectorFst::StateId _dictionary_state; + fst::StdVectorFst* dictionary_; + fst::StdVectorFst::StateId dictionary_state_; // true if finding ars in FST - std::shared_ptr> _matcher; + std::shared_ptr> matcher_; }; #endif // PATH_TRIE_H diff --git a/decoders/swig/scorer.cpp b/decoders/swig/scorer.cpp index 6b2803443..27c31fa71 100644 --- a/decoders/swig/scorer.cpp +++ b/decoders/swig/scorer.cpp @@ -19,19 +19,19 @@ Scorer::Scorer(double alpha, const std::vector& vocab_list) { this->alpha = alpha; this->beta = beta; - _is_character_based = true; - _language_model = nullptr; + is_character_based_ = true; + language_model_ = nullptr; dictionary = nullptr; - _max_order = 0; - _dict_size = 0; - _SPACE_ID = -1; + max_order_ = 0; + dict_size_ = 0; + SPACE_ID_ = -1; setup(lm_path, vocab_list); } Scorer::~Scorer() { - if (_language_model != nullptr) { - delete static_cast(_language_model); + if (language_model_ != nullptr) { + delete static_cast(language_model_); } if (dictionary != nullptr) { delete static_cast(dictionary); @@ -57,20 +57,20 @@ void Scorer::load_lm(const std::string& lm_path) { RetriveStrEnumerateVocab enumerate; lm::ngram::Config config; config.enumerate_vocab = &enumerate; - _language_model = lm::ngram::LoadVirtual(filename, config); - _max_order = static_cast(_language_model)->Order(); - _vocabulary = enumerate.vocabulary; - for (size_t i = 0; i < _vocabulary.size(); ++i) { - if (_is_character_based && _vocabulary[i] != UNK_TOKEN && - _vocabulary[i] != START_TOKEN && _vocabulary[i] != END_TOKEN && + language_model_ = lm::ngram::LoadVirtual(filename, config); + max_order_ = static_cast(language_model_)->Order(); + vocabulary_ = enumerate.vocabulary; + for (size_t i = 0; i < vocabulary_.size(); ++i) { + if (is_character_based_ && vocabulary_[i] != UNK_TOKEN && + vocabulary_[i] != START_TOKEN && vocabulary_[i] != END_TOKEN && get_utf8_str_len(enumerate.vocabulary[i]) > 1) { - _is_character_based = false; + is_character_based_ = false; } } } double Scorer::get_log_cond_prob(const std::vector& words) { - lm::base::Model* model = static_cast(_language_model); + lm::base::Model* model = static_cast(language_model_); double cond_prob; lm::ngram::State state, tmp_state, out_state; // avoid to inserting in begin @@ -93,11 +93,11 @@ double Scorer::get_log_cond_prob(const std::vector& words) { double Scorer::get_sent_log_prob(const std::vector& words) { std::vector sentence; if (words.size() == 0) { - for (size_t i = 0; i < _max_order; ++i) { + for (size_t i = 0; i < max_order_; ++i) { sentence.push_back(START_TOKEN); } } else { - for (size_t i = 0; i < _max_order - 1; ++i) { + for (size_t i = 0; i < max_order_ - 1; ++i) { sentence.push_back(START_TOKEN); } sentence.insert(sentence.end(), words.begin(), words.end()); @@ -107,11 +107,11 @@ double Scorer::get_sent_log_prob(const std::vector& words) { } double Scorer::get_log_prob(const std::vector& words) { - assert(words.size() > _max_order); + assert(words.size() > max_order_); double score = 0.0; - for (size_t i = 0; i < words.size() - _max_order + 1; ++i) { + for (size_t i = 0; i < words.size() - max_order_ + 1; ++i) { std::vector ngram(words.begin() + i, - words.begin() + i + _max_order); + words.begin() + i + max_order_); score += get_log_cond_prob(ngram); } return score; @@ -125,7 +125,7 @@ void Scorer::reset_params(float alpha, float beta) { std::string Scorer::vec2str(const std::vector& input) { std::string word; for (auto ind : input) { - word += _char_list[ind]; + word += char_list_[ind]; } return word; } @@ -135,7 +135,7 @@ std::vector Scorer::split_labels(const std::vector& labels) { std::string s = vec2str(labels); std::vector words; - if (_is_character_based) { + if (is_character_based_) { words = split_utf8_str(s); } else { words = split_str(s, " "); @@ -144,15 +144,15 @@ std::vector Scorer::split_labels(const std::vector& labels) { } void Scorer::set_char_map(const std::vector& char_list) { - _char_list = char_list; - _char_map.clear(); - - for (unsigned int i = 0; i < _char_list.size(); i++) { - if (_char_list[i] == " ") { - _SPACE_ID = i; - _char_map[' '] = i; - } else if (_char_list[i].size() == 1) { - _char_map[_char_list[i][0]] = i; + char_list_ = char_list; + char_map_.clear(); + + for (size_t i = 0; i < char_list_.size(); i++) { + if (char_list_[i] == " ") { + SPACE_ID_ = i; + char_map_[' '] = i; + } else if (char_list_[i].size() == 1) { + char_map_[char_list_[i][0]] = i; } } } @@ -162,14 +162,14 @@ std::vector Scorer::make_ngram(PathTrie* prefix) { PathTrie* current_node = prefix; PathTrie* new_node = nullptr; - for (int order = 0; order < _max_order; order++) { + for (int order = 0; order < max_order_; order++) { std::vector prefix_vec; - if (_is_character_based) { - new_node = current_node->get_path_vec(prefix_vec, _SPACE_ID, 1); + if (is_character_based_) { + new_node = current_node->get_path_vec(prefix_vec, SPACE_ID_, 1); current_node = new_node; } else { - new_node = current_node->get_path_vec(prefix_vec, _SPACE_ID); + new_node = current_node->get_path_vec(prefix_vec, SPACE_ID_); current_node = new_node->parent; // Skipping spaces } @@ -179,7 +179,7 @@ std::vector Scorer::make_ngram(PathTrie* prefix) { if (new_node->character == -1) { // No more spaces, but still need order - for (int i = 0; i < _max_order - order - 1; i++) { + for (int i = 0; i < max_order_ - order - 1; i++) { ngram.push_back(START_TOKEN); } break; @@ -193,19 +193,19 @@ void Scorer::fill_dictionary(bool add_space) { fst::StdVectorFst dictionary; // First reverse char_list so ints can be accessed by chars std::unordered_map char_map; - for (unsigned int i = 0; i < _char_list.size(); i++) { - char_map[_char_list[i]] = i; + for (size_t i = 0; i < char_list_.size(); i++) { + char_map[char_list_[i]] = i; } // For each unigram convert to ints and put in trie int dict_size = 0; - for (const auto& word : _vocabulary) { + for (const auto& word : vocabulary_) { bool added = add_word_to_dictionary( - word, char_map, add_space, _SPACE_ID, &dictionary); + word, char_map, add_space, SPACE_ID_, &dictionary); dict_size += added ? 1 : 0; } - _dict_size = dict_size; + dict_size_ = dict_size; /* Simplify FST diff --git a/decoders/swig/scorer.h b/decoders/swig/scorer.h index 72544da7b..618364635 100644 --- a/decoders/swig/scorer.h +++ b/decoders/swig/scorer.h @@ -18,7 +18,7 @@ const std::string START_TOKEN = ""; const std::string UNK_TOKEN = ""; const std::string END_TOKEN = ""; -// Implement a callback to retrive string vocabulary. +// Implement a callback to retrive the dictionary of language model. class RetriveStrEnumerateVocab : public lm::EnumerateVocab { public: RetriveStrEnumerateVocab() {} @@ -50,13 +50,14 @@ public: double get_sent_log_prob(const std::vector &words); - size_t get_max_order() const { return _max_order; } + // return the max order + size_t get_max_order() const { return max_order_; } - size_t get_dict_size() const { return _dict_size; } + // return the dictionary size of language model + size_t get_dict_size() const { return dict_size_; } - bool is_char_map_empty() const { return _char_map.size() == 0; } - - bool is_character_based() const { return _is_character_based; } + // retrun true if the language model is character based + bool is_character_based() const { return is_character_based_; } // reset params alpha & beta void reset_params(float alpha, float beta); @@ -68,20 +69,23 @@ public: // the vector of characters (character based lm) std::vector split_labels(const std::vector &labels); - // expose to decoder + // language model weight double alpha; + // word insertion weight double beta; - // fst dictionary + // pointer to the dictionary of FST void *dictionary; protected: + // necessary setup: load language model, set char map, fill FST's dictionary void setup(const std::string &lm_path, const std::vector &vocab_list); + // load language model from given path void load_lm(const std::string &lm_path); - // fill dictionary for fst + // fill dictionary for FST void fill_dictionary(bool add_space); // set char map @@ -89,19 +93,20 @@ protected: double get_log_prob(const std::vector &words); + // translate the vector in index to string std::string vec2str(const std::vector &input); private: - void *_language_model; - bool _is_character_based; - size_t _max_order; - size_t _dict_size; + void *language_model_; + bool is_character_based_; + size_t max_order_; + size_t dict_size_; - int _SPACE_ID; - std::vector _char_list; - std::unordered_map _char_map; + int SPACE_ID_; + std::vector char_list_; + std::unordered_map char_map_; - std::vector _vocabulary; + std::vector vocabulary_; }; #endif // SCORER_H_ diff --git a/decoders/swig_wrapper.py b/decoders/swig_wrapper.py index 5ebcd133c..0a9211258 100644 --- a/decoders/swig_wrapper.py +++ b/decoders/swig_wrapper.py @@ -39,8 +39,8 @@ def ctc_greedy_decoder(probs_seq, vocabulary): def ctc_beam_search_decoder(probs_seq, - beam_size, vocabulary, + beam_size, cutoff_prob=1.0, cutoff_top_n=40, ext_scoring_func=None): @@ -50,10 +50,10 @@ def ctc_beam_search_decoder(probs_seq, step, with each element being a list of normalized probabilities over vocabulary and blank. :type probs_seq: 2-D list - :param beam_size: Width for beam search. - :type beam_size: int :param vocabulary: Vocabulary list. :type vocabulary: list + :param beam_size: Width for beam search. + :type beam_size: int :param cutoff_prob: Cutoff probability in pruning, default 1.0, no pruning. :type cutoff_prob: float @@ -69,14 +69,14 @@ def ctc_beam_search_decoder(probs_seq, results, in descending order of the probability. :rtype: list """ - return swig_decoders.ctc_beam_search_decoder(probs_seq.tolist(), beam_size, - vocabulary, cutoff_prob, + return swig_decoders.ctc_beam_search_decoder(probs_seq.tolist(), vocabulary, + beam_size, cutoff_prob, cutoff_top_n, ext_scoring_func) def ctc_beam_search_decoder_batch(probs_split, - beam_size, vocabulary, + beam_size, num_processes, cutoff_prob=1.0, cutoff_top_n=40, @@ -86,10 +86,10 @@ def ctc_beam_search_decoder_batch(probs_split, :param probs_seq: 3-D list with each element as an instance of 2-D list of probabilities used by ctc_beam_search_decoder(). :type probs_seq: 3-D list - :param beam_size: Width for beam search. - :type beam_size: int :param vocabulary: Vocabulary list. :type vocabulary: list + :param beam_size: Width for beam search. + :type beam_size: int :param num_processes: Number of parallel processes. :type num_processes: int :param cutoff_prob: Cutoff probability in vocabulary pruning, @@ -112,5 +112,5 @@ def ctc_beam_search_decoder_batch(probs_split, probs_split = [probs_seq.tolist() for probs_seq in probs_split] return swig_decoders.ctc_beam_search_decoder_batch( - probs_split, beam_size, vocabulary, num_processes, cutoff_prob, + probs_split, vocabulary, beam_size, num_processes, cutoff_prob, cutoff_top_n, ext_scoring_func) From bdfef747e60b56f61247fc287507667437cf5206 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Mon, 18 Sep 2017 13:19:02 +0800 Subject: [PATCH 200/335] adjust to pass ci --- decoders/swig/ctc_beam_search_decoder.cpp | 3 +-- decoders/swig/ctc_greedy_decoder.cpp | 2 +- decoders/swig/ctc_greedy_decoder.h | 4 ++-- decoders/swig/decoder_utils.cpp | 3 +-- decoders/swig/decoder_utils.h | 2 +- decoders/swig/path_trie.cpp | 2 ++ decoders/swig/scorer.cpp | 4 +++- 7 files changed, 11 insertions(+), 9 deletions(-) diff --git a/decoders/swig/ctc_beam_search_decoder.cpp b/decoders/swig/ctc_beam_search_decoder.cpp index 5c8373bea..624784b05 100644 --- a/decoders/swig/ctc_beam_search_decoder.cpp +++ b/decoders/swig/ctc_beam_search_decoder.cpp @@ -9,7 +9,6 @@ #include "ThreadPool.h" #include "fst/fstlib.h" -#include "fst/log.h" #include "decoder_utils.h" #include "path_trie.h" @@ -130,7 +129,7 @@ std::vector> ctc_beam_search_decoder( log_sum_exp(prefix_new->log_prob_nb_cur, log_p); } } // end of loop over prefix - } // end of loop over chars + } // end of loop over vocabulary prefixes.clear(); // update log probs diff --git a/decoders/swig/ctc_greedy_decoder.cpp b/decoders/swig/ctc_greedy_decoder.cpp index c4c94539e..03449d739 100644 --- a/decoders/swig/ctc_greedy_decoder.cpp +++ b/decoders/swig/ctc_greedy_decoder.cpp @@ -27,7 +27,7 @@ std::string ctc_greedy_decoder( max_prob = probs_step[j]; } } - // id with maximum probability in current step + // id with maximum probability in current time step max_idx_vec[i] = max_idx; // deduplicate if ((i == 0) || ((i > 0) && max_idx_vec[i] != max_idx_vec[i - 1])) { diff --git a/decoders/swig/ctc_greedy_decoder.h b/decoders/swig/ctc_greedy_decoder.h index 043742f26..5e64f692e 100644 --- a/decoders/swig/ctc_greedy_decoder.h +++ b/decoders/swig/ctc_greedy_decoder.h @@ -14,7 +14,7 @@ * The decoding result in string */ std::string ctc_greedy_decoder( - const std::vector> &probs_seq, - const std::vector &vocabulary); + const std::vector>& probs_seq, + const std::vector& vocabulary); #endif // CTC_GREEDY_DECODER_H diff --git a/decoders/swig/decoder_utils.cpp b/decoders/swig/decoder_utils.cpp index 665fcc22f..70a159288 100644 --- a/decoders/swig/decoder_utils.cpp +++ b/decoders/swig/decoder_utils.cpp @@ -23,10 +23,9 @@ std::vector> get_pruned_log_probs( for (size_t i = 0; i < prob_idx.size(); ++i) { cum_prob += prob_idx[i].second; cutoff_len += 1; - if (cum_prob >= cutoff_prob) break; + if (cum_prob >= cutoff_prob || cutoff_len >= cutoff_top_n) break; } } - cutoff_len = std::min(cutoff_len, cutoff_top_n); prob_idx = std::vector>( prob_idx.begin(), prob_idx.begin() + cutoff_len); } diff --git a/decoders/swig/decoder_utils.h b/decoders/swig/decoder_utils.h index 932ffb12f..72821c187 100644 --- a/decoders/swig/decoder_utils.h +++ b/decoders/swig/decoder_utils.h @@ -2,8 +2,8 @@ #define DECODER_UTILS_H_ #include -#include "path_trie.h" #include "fst/log.h" +#include "path_trie.h" const float NUM_FLT_INF = std::numeric_limits::max(); const float NUM_FLT_MIN = std::numeric_limits::min(); diff --git a/decoders/swig/path_trie.cpp b/decoders/swig/path_trie.cpp index fdff32861..40d909705 100644 --- a/decoders/swig/path_trie.cpp +++ b/decoders/swig/path_trie.cpp @@ -19,9 +19,11 @@ PathTrie::PathTrie() { character = ROOT_; exists_ = true; parent = nullptr; + dictionary_ = nullptr; dictionary_state_ = 0; has_dictionary_ = false; + matcher_ = nullptr; } diff --git a/decoders/swig/scorer.cpp b/decoders/swig/scorer.cpp index 27c31fa71..686c67c77 100644 --- a/decoders/swig/scorer.cpp +++ b/decoders/swig/scorer.cpp @@ -19,9 +19,11 @@ Scorer::Scorer(double alpha, const std::vector& vocab_list) { this->alpha = alpha; this->beta = beta; + + dictionary = nullptr; is_character_based_ = true; language_model_ = nullptr; - dictionary = nullptr; + max_order_ = 0; dict_size_ = 0; SPACE_ID_ = -1; From 6db33ff194392576a46420c17d70ece37e6953ff Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Mon, 18 Sep 2017 00:27:02 +0800 Subject: [PATCH 201/335] Bug fixed for cloud training for DS2. --- cloud/pcloud_train.sh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cloud/pcloud_train.sh b/cloud/pcloud_train.sh index d04132f90..804f606a2 100644 --- a/cloud/pcloud_train.sh +++ b/cloud/pcloud_train.sh @@ -15,6 +15,8 @@ python ./cloud/split_data.py \ --in_manifest_path=${DEV_MANIFEST} \ --out_manifest_path='/local.manifest.dev' +mkdir ./logs + python -u train.py \ --batch_size=${BATCH_SIZE} \ --trainer_count=${NUM_GPU} \ @@ -35,10 +37,10 @@ python -u train.py \ --train_manifest='/local.manifest.train' \ --dev_manifest='/local.manifest.dev' \ --mean_std_path='data/librispeech/mean_std.npz' \ ---vocab_path='data/librispeech/eng_vocab.txt' \ +--vocab_path='data/librispeech/vocab.txt' \ --output_model_dir='./checkpoints' \ --output_model_dir=${MODEL_PATH} \ --augment_conf_path='conf/augmentation.config' \ --specgram_type='linear' \ --shuffle_method='batch_shuffle_clipped' \ -2>&1 | tee ./log/train.log +2>&1 | tee ./logs/train.log From e92d01e56250c66cfd583e9e2ae1049c2b40e939 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Mon, 18 Sep 2017 16:20:57 +0800 Subject: [PATCH 202/335] disable the make output of libsndfile in setup --- setup.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.sh b/setup.sh index 209539399..894aaea98 100644 --- a/setup.sh +++ b/setup.sh @@ -20,7 +20,7 @@ if [ $? != 0 ]; then fi tar -zxvf libsndfile-1.0.28.tar.gz cd libsndfile-1.0.28 - ./configure && make && make install + ./configure > /dev/null && make > /dev/null && make install > /dev/null cd .. rm -rf libsndfile-1.0.28 rm libsndfile-1.0.28.tar.gz From 7f45752a13c62770994db7b554cdf71e7abf424b Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Mon, 18 Sep 2017 17:03:08 +0800 Subject: [PATCH 203/335] Add profile.sh script for multi-gpu profiling. --- examples/librispeech/run_train.sh | 1 + examples/tiny/run_train.sh | 1 + model_utils/model.py | 18 +++++++++++++----- tools/profile.sh | 30 ++++++++++++++++++++++++++++++ train.py | 4 +++- 5 files changed, 48 insertions(+), 6 deletions(-) create mode 100644 tools/profile.sh diff --git a/examples/librispeech/run_train.sh b/examples/librispeech/run_train.sh index 9aa5e0d16..1d18f29ef 100644 --- a/examples/librispeech/run_train.sh +++ b/examples/librispeech/run_train.sh @@ -17,6 +17,7 @@ python -u train.py \ --learning_rate=5e-4 \ --max_duration=27.0 \ --min_duration=0.0 \ +--test_off=False \ --use_sortagrad=True \ --use_gru=False \ --use_gpu=True \ diff --git a/examples/tiny/run_train.sh b/examples/tiny/run_train.sh index 3c2b8a1e0..957aa63bc 100644 --- a/examples/tiny/run_train.sh +++ b/examples/tiny/run_train.sh @@ -17,6 +17,7 @@ python -u train.py \ --learning_rate=1e-5 \ --max_duration=27.0 \ --min_duration=0.0 \ +--test_off=False \ --use_sortagrad=True \ --use_gru=False \ --use_gpu=True \ diff --git a/model_utils/model.py b/model_utils/model.py index 09ee3c761..a7c08ba5e 100644 --- a/model_utils/model.py +++ b/model_utils/model.py @@ -54,7 +54,8 @@ class DeepSpeech2Model(object): num_passes, output_model_dir, is_local=True, - num_iterations_print=100): + num_iterations_print=100, + test_off=False): """Train the model. :param train_batch_reader: Train data reader. @@ -77,6 +78,8 @@ class DeepSpeech2Model(object): :type is_local: bool :param output_model_dir: Directory for saving the model (every pass). :type output_model_dir: basestring + :param test_off: Turn off testing. + :type test_off: bool """ # prepare model output directory if not os.path.exists(output_model_dir): @@ -114,14 +117,19 @@ class DeepSpeech2Model(object): start_time = time.time() cost_sum, cost_counter = 0.0, 0 if isinstance(event, paddle.event.EndPass): - result = trainer.test( - reader=dev_batch_reader, feeding=feeding_dict) + if test_off: + print("\n------- Time: %d sec, Pass: %d" % + (time.time() - start_time, event.pass_id)) + else: + result = trainer.test( + reader=dev_batch_reader, feeding=feeding_dict) + print("\n------- Time: %d sec, Pass: %d, " + "ValidationCost: %s" % + (time.time() - start_time, event.pass_id, 0)) output_model_path = os.path.join( output_model_dir, "params.pass-%d.tar.gz" % event.pass_id) with gzip.open(output_model_path, 'w') as f: self._parameters.to_tar(f) - print("\n------- Time: %d sec, Pass: %d, ValidationCost: %s" % - (time.time() - start_time, event.pass_id, result.cost)) # run train trainer.train( diff --git a/tools/profile.sh b/tools/profile.sh new file mode 100644 index 000000000..19abe7ede --- /dev/null +++ b/tools/profile.sh @@ -0,0 +1,30 @@ +#! /usr/bin/env bash + +BATCH_SIZE_PER_GPU=64 +MIN_DURATION=6.0 +MAX_DURATION=7.0 + +function join_by { local IFS="$1"; shift; echo "$*"; } + +for NUM_GPUS in 16 8 4 2 1 +do + DEVICES=$(join_by , $(seq 0 $(($NUM_GPUS-1)))) + BATCH_SIZE=$(($BATCH_SIZE_PER_GPU * $NUM_GPUS)) + + CUDA_VISIBLE_DEVICES=$DEVICES \ + python train.py \ + --batch_size=$BATCH_SIZE \ + --num_passes=1 \ + --test_off=True \ + --trainer_count=$NUM_GPUS \ + --min_duration=$MIN_DURATION \ + --max_duration=$MAX_DURATION > tmp.log 2>&1 + + if [ $? -ne 0 ];then + exit 1 + fi + + cat tmp.log | grep "Time" | awk '{print "GPU Num: " "'"$NUM_GPUS"'" " Time: "$3}' + + rm tmp.log +done diff --git a/train.py b/train.py index 406484a18..445f3d765 100644 --- a/train.py +++ b/train.py @@ -25,6 +25,7 @@ add_arg('num_iter_print', int, 100, "Every # iterations for printing " add_arg('learning_rate', float, 5e-4, "Learning rate.") add_arg('max_duration', float, 27.0, "Longest audio duration allowed.") add_arg('min_duration', float, 0.0, "Shortest audio duration allowed.") +add_arg('test_off', bool, False, "Turn off testing.") add_arg('use_sortagrad', bool, True, "Use SortaGrad or not.") add_arg('use_gpu', bool, True, "Use GPU or not.") add_arg('use_gru', bool, False, "Use GRUs instead of simple RNNs.") @@ -111,7 +112,8 @@ def train(): num_passes=args.num_passes, num_iterations_print=args.num_iter_print, output_model_dir=args.output_model_dir, - is_local=args.is_local) + is_local=args.is_local, + test_off=args.test_off) def main(): From 1471103daa91d0e0e47377416109f17104a3141f Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Mon, 18 Sep 2017 19:32:03 +0800 Subject: [PATCH 204/335] use cd instead of pushd in setup.sh --- setup.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/setup.sh b/setup.sh index 894aaea98..7c40415db 100644 --- a/setup.sh +++ b/setup.sh @@ -1,4 +1,4 @@ -#! /usr/bin/env bash +#! /usr/bin/env bash # install python dependencies if [ -f "requirements.txt" ]; then @@ -29,9 +29,9 @@ fi # install decoders python -c "import swig_decoders" if [ $? != 0 ]; then - pushd decoders/swig > /dev/null + cd decoders/swig > /dev/null sh setup.sh - popd > /dev/null + cd - > /dev/null fi From e8dce3a98233c80c3e8cf3e8781a21e6aae79568 Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Mon, 18 Sep 2017 20:38:06 +0800 Subject: [PATCH 205/335] Add README doc section of multi-gpu acceleration. --- README.md | 18 +++++++++++++++--- docs/images/multi_gpu_speedup.png | Bin 0 -> 156739 bytes 2 files changed, 15 insertions(+), 3 deletions(-) create mode 100755 docs/images/multi_gpu_speedup.png diff --git a/README.md b/README.md index 4080476b4..9e9113d84 100644 --- a/README.md +++ b/README.md @@ -14,8 +14,8 @@ - [Hyper-parameters Tuning](#hyper-parameters-tuning) - [Training for Mandarin Language](#training-for-mandarin-language) - [Trying Live Demo with Your Own Voice](#trying-live-demo-with-your-own-voice) -- [Experiments and Benchmarks](#experiments-and-benchmarks) - [Released Models](#released-models) +- [Experiments and Benchmarks](#experiments-and-benchmarks) - [Questions and Help](#questions-and-help) ## Prerequisites @@ -466,9 +466,21 @@ Test Set | Aishell Model | Internal Mandarin Model Aishell-Test | X.X | X.X Baidu-Mandarin-Test | X.X | X.X -#### Multiple GPU Efficiency +#### Acceleration with Multi-GPUs + +We compare the training time with 1, 2, 4, 8, 16 Tesla K40m GPUs (with a subset of LibriSpeech samples whose audio durations are between 6.0 and 7.0 seconds). And it shows that a **near-linear** acceleration with multiple GPUs has been achieved. In the following figure, the time (in seconds) used for training is plotted on the blue bars. + +
+ +| # of GPU | Acceleration Rate | +| -------- | --------------: | +| 1 | 1.00 X | +| 2 | 1.97 X | +| 4 | 3.74 X | +| 8 | 6.21 X | +|16 | 10.70 X | -TODO: To Be Added +`tools/profile.sh` provides such a profiling tool. ## Questions and Help diff --git a/docs/images/multi_gpu_speedup.png b/docs/images/multi_gpu_speedup.png new file mode 100755 index 0000000000000000000000000000000000000000..57a803bac8a6c793548abf71db2899aeac829fa1 GIT binary patch literal 156739 zcmZ6zdt8!tA3t1k%eG3_teHnvtZa8BnVF)1+iF@fWoCD#B9@kvlqh5h3RG^XDUw;? z5muIFDQ0Gh3Ra#|AQK_aiU75Bt5I`~Ka}^9L`^T-W#doZs*Fm$X0pytjN~ z{mr^{>$aRYe(cn`btd1gTerSv;|Aj^HP)*<BLyuRRe${A^mx~XV@}_7-P!f8aN9dU z2Y2my{P^*?pSNw>{)-J+OcN~;WuW{syz4=h(aLM!NkpWPQc1JII zh9xaPGlqsr6sP4jkcpq*`2H=?G06j0g_W~!>r518Wgaj8e-HL6-}Xe;A|d;gAA>{V z&kfy-GZosM{r@}|GHV#3oW#VYhg@McT68WpO~~w zt5FkR0KN}%#%63LU)`ng%doW_2@3U76fjaqm$#%Wzzv@&>r9J?Z{P%$X=@g#l!07! z(^tg)=cuE9p-fenqpP8Z05i?c9+DdclLyFY$zF7Q&`CN};ttPr8Rzt0DvU}K?P0go zzjhSmw>^zMtv;KdU5jta@;d2=bJ_(IwIT`{yoC!lvlF7Ml`O11CQR{)W+I@(+q?soq*^hgRhuZZU>BrJQF8NTEX*oS36uqK18vw*`0tGb%rKn zE}SG{+s9&UrZjHHJXPBR;0ux5RbHCb?!4UFeff#m{uF)lTojV|OIS76x56dtKbJ3I ztncQ^au(Uu%VTupTl_I~-^lj4-DWP;stneK#1epJYy54+qt+Z^mv=Qfe|Y>tsMIQv zt-3cJ8+jFdz1K1Z{4*F1amnp;AQUl$#}A|=gv+bEm}dsv#K}Rf{YA3+O&d3EWB>F+ z@gs27qelsfcjdW7&KM3|m@8YyKTodA0^~`jdTL=lhVEx@4$>=1|Lk%ocFY;RU!%Wx z(sAk1=z2KFO^WIs&n93of*pNrh$Np{`6d^3YR6Tiy!Xcv9J?pGAj|mXOeIhBAICs0 zUH^NHH0S!h5x}-tHMA3?s+LjbocvCwVBmA-4?BxVr&CPKsF%!U|BkdPBMJ~93(SGf zip3p}J=C_n0DIk~gsvbTXe|z#=rRjYsGRS~+@3ThQ5{4_xpc8tObQDTgU9A{nhb>g z{KwrZg1ui^2jToqqq?lYdD?sEKfPh%H)ohm(6^(m8;pk-x^y*cEz8v&nAFk3Y3}MC z?JcSl{@5sbzw#}c-X`z0!W}r**T^3G;G_*Mv>A&HsP`57ES_VY36?w8a9aPz%`~(^y&ERsN_G!DwAL_}&@gSluc)g-dmz5hAdZn67+}PsvV# z3ry1YHSb?={)SV_rb^9ms@x3)1eM0-y#7UYekFQYUo?Z{Q4&4?(vo@~m z!cO%zRJ28<;DTQv-tnLdmeS#{`1eDsOLO-}5BBKI5a&CcF1O-`t=%@yN!(3+R{kWa z;v~D{i!qcWVqwX!b!@REV5;R4s$(DeQm@5KkF98vqbJTO0ZW-9oIP4{y?K3+;|Lxn z()~B|HDptj@+R3M1HYhiO-?#|s-yx@lW;kZW}jaTAU+%prOXsmVM z(Ze);LASL+e2gVb)%(--`E9eAYDranJUG4Eaq$U3Bbp|=5ZtqkpLl)d(LthLELw}sxVBtN_=G;e&`j6E z8ErqfYC`#V^(hQJ>7}g?A)!d`M5tvZUm^qJ2yU*oJl+!Q1d``)-bNqFaA^lY+@|$L zug#9}kZd{nKJdxDwO-@a*1#*Cm(*C|n(T9P&ad1F z;nLw}iUWK)Z%p#XK%A^iUhm0XkPpVyZg{P&dT5O&wxG4qXP^#4X!?W}U0ES{&^%-6Iwzxi`tV|$$6mp%w+w=GiIWpys9+y=hGA*>}Y z-&UPf;<7;4T?{%a>`4!nmj-#BrlyRCFZ6{0UOrt_rK%=}nj)-vM}y&opD%?9%3fH_ zmvy+@vGJ}wtG@+4WI7PPBF`u{I_ISeC)Hi0xSsaC54(#?F`J>T*PxM~H#2n83i8Rq z;Xz1rM1;z#?O1sV0qvEb{lS!Py~Cty+?9Ts#}u`}wBfV;))74J`RFel;Ow*L$XcX* zMWs%kh6-DDXhV?WfQT!U|F%gYuH76$IkMsLS$qHS6~mm8x0CF1%2O@EoJJi;%s{(%b$bxD|C6R zX_maPv_(th%^}H+ZTHxt9o2NC)L6@Hioa%s#}*|~2%Kgg3SQO04szWC@0#2LcCFjW z^L8R~DO3AydoN3Ek=PmV`?6}zd@f3zg8~pkKpZ&6aF&h!n~EpoT9n(gi{&R#9QiP z(ELs^o+RCMPB8V`VCc`sdAr_!Kk#F$)9a;!@RJ2OOBpYQKNI3c%?^6Yo67+GfI?VQ zONQ8oF1+XC$h6Ele?UR$?E6NZ?qYN-ZW^?#)Ok-d6KRGNzHR8}L<;WZyWliknsuGs zYVC9#E*z)v%{+^nI2$(p-%1yUF@?|;M|R9Zb~%Nw`u?$r7OcKEK*DPN01YRxGlE4sr}6YO9MuTf0(Qb>BD0 z({o}^qq|nUs=Z?4M%xx>F95JK%F5Ut0EO6LWwhWP-`3ZZF_(uBn|3Gp%peJD+W8B& zj-U|pC`GF=y(ND8tEI8uHTM7hd{(y5E;<`lt?dM~LhSu}uiF9SrJmyBFf>&*B%rX9v#ea!Nx`~8(Cn6<^-Ew3QWO$&$d67xC6xzc1zYRZeWsHX(pI76HMta z5s~ahXl*y)m664z+?8gX}E{Dmy%y;SJ6s*T}vJ6;Qt&M>x zJfAxpkzXX1ZfJA4h^cNCP~Ak|L(z8SRE>vMTom>dy{(WCqZ@6rZG_ors@8VW2|ot1 zt6iA?8M6@e#`5MuZ-kaqMXep_%KU*P4*}8#vltE21ITh8xaLwV*i}A2?|9Hd%o#uz z*H9a@uV5GM7ucchV{;@cJ@0ZIPYhWq8&kd?@EN|eMHmcRyjxcw zSg^s2Ue7Vihyl3MD%$;)!FMCIlGlz>}*=I{~p{HyzBoRwd8{8x5uq zI`RA}KaTT%O*FWkKT^iSHFV>Wm=LsNPy&W?IP!uQmzi<93Zzpe(Vk#l?{`+>DEPj%c#9O&9T<(<6t!v7rUhr~kop#lfz3aRo&I8}Yy(Po7TVqFyxAP= z*pHWI(~#T_m$7VQc?5@VNhHW0VTA1iaQnJ0<1B@8OK}i==BWN@*by1_6rd(t8y~$5 z&UOLXM*G;-Vy7!$K$pZjH5GcvRQc_kI9ZhDidjOqRMkWNpW06J7{w1fn7|XSIi`T< z{aro#3~wi{rjEPyPj4F7Mj zkTsgnYbf-!-2BDoWm~=RDnsHr>Z3j2Tok`6y9Zi2#ek^%*CfxJGYPl+X%V)%XDJcU zr)e%u?0e{F-)MQcRqu}uP6_%8j(Y+j*GmU4QCQY5hrT-Y9k=JW$>R;GvdhD@ao3_+B-SlySdU=z7F;c* zsixR+$5H0|mr$L%`u9xt2;ua_*HT-Gdy(y9NRf+7_6Shk8TV~z6@4o|?gMH#$6b8S z(jItKJ`?dL2I_=+92YL_aH?sHE$;Rnvi`rVo6c@-aAxE-BoGA)F6}QXbvgx1@etRX zvFnC_CHiY8kx|?U2T)}YJSJgq;X_5#e%==Yiv~?lnvv*uU#p>&7;OpSRoQ1T6fnF*q&>E|ymgef8y6+&qDLD?F1s z@ezS}*;;NjQ9mh(kLC-X1PbdKp^_(_Ik;uFK+|YY5fU40;wU7zjGD1{Y2p72y5`+I ze9beHh}RqiZ!9bED4J4m-rP8K#NZ+pQ=F~a7TQ8!NbLti*|GyHxb05)fAUnph*^SP zF_49OEW<=ezQiiitO{#E$NiOC!9AXEn-H(OGQiM9fg@ zQGF2{F1pW=&Y4X-5Bk3if^Pa}dZT&;9?u_2?0lC z2zor0<9$Jofz@e!prW`UbWSWH-M?`DTj*fbBnpk&B49Zk;$35cHPU0E=Yin@flcXv zk7L%(lpK;nZ*~Qx(-)D^UY=qIF!2smdFze7Y_r#PEbSt2-oI$Ky?eFRm)ce8;EEQf zJq5%?smh!%UBO;pujMDC(>~^^-I>)w2?R`p1o`+%v6-Qb?h%TXDjG|1_zvm{kw0;S zhv?cXx)fHF&?o`<{H;nqFw>fU#JSqAL4o52@zA59aApRIX-yd zDaHJ2+4N`0xU2Xuo&C(IE)aW&R=>T4c2vXdc|1Rzt2;%GO0jK(YOH4%P)cdXU|m)o z>0DZX&3SQipT2%ig<)bh9jvhxAS#w-zj(jL>uV@{FcOWNZ)l55`st@fg*Ka8;qrg%;`pjG39M?3`H) z+KXM-YSeZa`EUj7_bu-6kU6-v@QL^N=9L9!EuJNw10Tdo~OfF`y5<7uMn?IJJ7 zv(&N7@dIN(DH$77BobJP_W-f;+T8pJ0Ah;!n9p=hR}iOrScbXOpc+j z3GEj(ovl6vOvR&@=o)`*=9 zLe{8<3D>S25Nj0_#Gx z!MSyYRKi))CSXci_F0+!27d{gF?D_oK^EbL(I?@{4w3XaW3T6X;8)?X3~9LIF}j+W zPR(<5U7kb>N?eSX(4D7@IHz$~0YQ5*RIDnik1iq}dILktGlHBw&zO&B#=7-oJ)*1z zL5Ra02UmquRe)kNL!yhSGltUf%MOeNg)=#}2v&|MaL*HP4@dn^w|6=IEv|CwVwd+& znbL#Y6!BIurfZCq%H3%L8bnG#M$xLk0)UryLu$_ro^tT4?5ZN2Arf+JsvPi4fM0k= z;ZzGn?)}dqSGH1vnfpXpc$IZYWJMVltBZ2{H#x~J0Y#qsr-*V-RS+({QreLb92rv;Y@?xXapVOGQ(*qpf4MnS;Vh?ic$N02**il?s9t#OOkCpB-Lh)~7KI;2+| zD^Myt^xQf4=s4kBnFV*kx`k41Q*IKi?N)qT)$Yn0udKfjv$YSxwMZGjX^wdbtImt= zxjy&zZfri<)(+#Am>qdv>hub(`eca|wwIT{@WOXH$~}L9FVbkMs?6@9KG6t)>6v&o z-j`W4zO!NBDjNqr7Cp78t&SB}FaWf4S3W${o-9Pvi6okloistEx+}c%PMiI=o4CP+$0^NRoF!sB!52fHp z!d%(KOd)IQ{>3!ir5_s=^8D-sVRKKLd-nj$o=^+4uWr_03cyWuZ1~5X+8y&=e%ss~ z`V-Drq@et4ry`e971r8bFFJ+w#wxlZqlhzUrSAa5*0-)SXqcVVwx7Z%{uBkg0HWyg zMutm;0uMOIsmPEf?>A?=#HG2An;%37Qdr2``bc(g3DRC8{og_{@9|M~nqNP4U#4vC z=uM=b!JOm_W(0{qikW?FdjkPdD57yNgL{##@m~|P+s#M1#4NXWJEOzBSXSj$pa;mt zjJ}%1g`9CuK@VvT>WEt|q2aMD!6m~+*e5U_DYvkhe-#Zf*8V?#wcPm*|4n^mL9VyB zigLnl@p^REYF~&s{+MDY5W%G53xui84dAYxdScbXV3j{Ly>~#i!*FFwT0K=C&^hL` zqbhPXF9!wtbhhS*;zbQa2ISCF7DV1*kKK)U71!T$T;4l>8J^fLUFIm4kE|`g*Iw($ zYs$5&!*g3kV$@$V=Dzl=f2Q+XQ+r}o^%ZLb^*xa8UcSx9Qfmn*8uTHCI*GOKqzlpb zp_>YUSy|)06k@%~R{No1<--6jM;6boE=Yu8KrH+B-9673P@JD5Urlb6_It9ssvdHt zb~-5f$gSIr*o&MS+LLM`jKka2Gt|4;=(j!`4v)5g4XBVEXSk?$uPywYVja7juDCeu zt`QDUy)V!vfc1P#~M1R%08$w!bExAu0Z3l@Fw~I)rlnu3IL@% zqv_UGm21(RlhFxH0TqG)yAbrNwlj#ZRT+6zcE5yh4ZKk4=er;1Rv=9FAgLV4SoK1Q z=g^cih))no$6>SMM?k7wpjSVpkkc*5E1k9m&eOj%5Qk(i+z~I# zE28B|ZteDDsq(U6xr&IcJg`&ZK{!b`?RjUaM_CZhpEN zbX0k~LUN%nsniP!^~O07Ge*svfVG^oj+%!J{s$_xeW5>}{XzPzX=k1d4WafwVEMu! zmQgb3zt-wy=ozV_AH$UKUF@q?5lzQYpu|qNqpeqX$mj=<>}g4jfx>)LgvJhqj+CUB zr)1d!3yl@_1t<0C!|(K_o=flY8M<6X--?KA5c#PNfz*cvyk}YC#O(aR3+aIVwE@n| zz!A@-WY1dNK%-%x5YWMmzFx((Bz9X~MXFuGa4Gd9_D{7!yPpn55jS_#7l6r0?1aDh z|6FVg=yY`AzyOl>M)Wm?LxvMRWgmf)RN1clB&oA2(oHL7S&-602E%bF9)ss`GLI2_ zQ=!g&#ddg+vpBTUUt#E{-iWoLK+v{(2Bmo9-b;3zO0VKXd`<(w2dpBjSZ|3 z!)(p6E^^CK(SWP{Vr|=1d(AWN8R0v-YmU3ZW_poV84IsO0R6|5X!_ir4v3F3=!hEX zYCHxT^fqwxuSC-&JG;Ur`g{iJ%CPFd2sVD$u8`aZ=J(Eqt5hpjp%GXpxyB2^nmX@D zWudKq(={)g_EeZNf<-frV9;!|9mBR`tEKau_%3A7Q4}{PP90=K!-88cQA%q>oM$6f ze4Pi8`bEOR(`er-jFP0+QSCTmg7wpnWzp+XB6p)KRNsdz{!nMeIi1X47dS*xpRQs* zPVCUoj#9eWWdSx*@2v0$!p$PM`N0%7e_!cbPGH#I_qUc zZ+|WfFC|R=6#XUYEKM0K*~+R~wD6p_sP_mICg}K}RWT?UCd0u{O+_`}g&t4z;LVkX z7~R87j`pJ~-2g+k)^Cq{>v2$Y7tyL5Fx-+!HilH+iV$NEN&`s~fk%u^V59}R2cNzJ zvZ30`d1PXNnrM)~S0&t|_Nu#F%&HXQ2eNm&?zo?{7*CE@r+P!e)jI*IodfBA4qG_d zk~D$jxW&ZJagO4pVd!!ne|Tlid~HtJ<7wz|HChk)#_}5X;;J!_X(c31P?MUmwz5_8 z#WdWQdEw>BjP>*&RTW#Y7T&`7jb%bOYXdsSG`mW0^gvb?8up9aSC9j~ za&G#ow?0jhpq@@Q)yv{QUPq6pVSYigBXV#yJ2=Uk5OW&=c9`r4B$i(!PQLu*JA|wdI>XQQvTkwj?Gh(z}joZ!!LAgGin7YZB6H+0H$Rcs%PtgjY)`TXu)w)-x-Rp63 z&H+^cLw1qhR{FsEFrOfsGG(kkVsvs-wUU2hR^Pe>@oUfCm zP|0`BHLm^*M(&uhKxhy8MAg3@W)Uf)K|YYg53$fzNrq}S-KBx+Z6Dr&Ch_Az=YKs} z>23dq@X9@FE@iHYhp+=jq$R$u{lf$n5*XF4iMP|b-f@kWR`7BRb+qb*=)wZ4=%Ky* zTJxId(^wco{9=@3ka@ zwTFu1!=(g-1tGEO;Zc3!AnSUikeV4%SS_rzhf>PX1%yP(;c}bH-=SeRQR>?47r#CZ zf6Ur6m17;1XOqvMluw<1E!$~W)VyYN(_4`xxFSdqIwG1rEYFaqrkulOlsfQ{ZQ4L; zymadFRi9c_x=VyOW`Zib2R(&~7Bso3t#yxLugWzpSknyJUAKfZb5QQ6@5>Cql=sR! zU@dm{Ktle&U}TempZF}$2db7NUI~wCQJ?LaK+4+_e@VEXXqP`@)6=-N+_-kDe3aV( zSgr)rvw8|=U$fhz-iNKUgk1sHtvv$hA8p}B+vy`Che5IY#9BQ`!5p zEBR-Qd{jnz)h_m}SHuZmqd{=i_0~WMSd5KV-fX|c64Uo> zsM!;VtM-yhcog}kHiVg4P~4hl9hIwibbIeZh(fS%a&|AAt%I@I6McbD_0!em1^phr zeR$VIP-hPM1jW`Vf!Vt^B?p4V1Z6XzJwDXkTM7h|CmNa_U}CuMia4H{0MF`>e3#|% z7R3iV=(o&xJT1Os>MOjKpY)pak3M2Yc9>@ z+wmJFfu1U$Ydotn)bZmyECWq=xk%Qc$Zyym#S{P5Wef+2T9x5u!>Oz10owClUsabF zHCmSEQodLzP@M>d^c*EkyS-S&8m4)fM*WmiI)${O-cPoPuX%@jFYG?mO6DWp`7eUlY?dGp4Ui zdcRg}7^+y)SBx-IXuBpxd0KDd_mqDJ5}pS3Cr!~vQ_)nC#G7E!{w-|w;QDFPTKX9_Kc2fOTm8$iH1-2>&=ISgK{dPH5J1{00F#bW7V=G(EgtB}cG z%=1=uGbB#$|OUx*K4@t z7^0$dC`colugXvNM0=7%y3oC{zaB4$XBarO6M!K3dVsDH5B4pt>wE*Zv&8&^2|4a$kJ zg7sA8PfA6{rqy(C%KW*FCRZ6vOUEPZGp63n&#$}F_Bk#y*G6`MzJ0liztXh@(|&y^ zzIZWh6>%6kchkIKv`4?B8%HfU!^WN8G{Tjuvr102dPO;MLbk2L!nNjMR>30}%i%#! zn8pvr!m&TKS?7TL{l@#8@C4M4aARS~GE5ZGV9S!&-MqY4QbOp9*xMm(xWfx)Fc=w- zl7o=y&!P>F4BaDtSQzySvId-u12fF=5I(KbMcjkr1-QQdlnl`c4~erBAFT26iQNtw z@i}p+Q;GDhtl$|vNwD_MeO=vHC7i;>K{dB*S^+*jTk)aj=G z(FXR|MWbRB4JzXjWbq_aXS$P!(d^GCLuHz-96hW{af3$cDp#~)Jh3Hd%n}(h|FOXv zrpg-dh@}ABW&Xd>XLf`_UYo}6$Ae7c2~@kmoina5_3eo&4xM@Y4%*DJB}VV%1EO)+UN1@l=i4zhKSEpa1e$B z^Rr=)k3A@CJN`}Flz6?L+3ZR6=ASbo_$QFIUA#px-bfMu&d^?F9APK4YwqIwSR(dp zHL)*5pX0Bvo9C)WK*5V8WCDHdgJ)a2am(07H&OdX&qKWE- z=#^8GLGVS>&hum^k+NW~1DXlKw+VY6tCuV+Y8tDLhBJ5TDpdcfctu>;SJ~iv&wcd= z3E+hNOfaGJC_$W!G=knwc$EO`3gmH{&!Lr~D05(KO@5f}u*VvHY5%y4G%LG~g*~|A zJ?XUu+45cd6)YhBJ@o7NNqomKGuZmHP2V7~Ek19Cm4;zZ=YaRRv=v8yy6ME=6$Q(_U1oIY$})I=gai!mA;A3Ae1K^Y_|pZe6l_L@NipG zrzO5=O=fIit}^26h~~OKZmhh1Y7EE&g*FEIhq3iLSA!ncqGj(37H#KWxgkFZJ=+TE z6TaUUm#b)k#oinpwH-hT=EnQ{pzhCI>sI>^1vyB`C6?yzhl7F-)zM^GD5ilec%g7t zBY>o-K0UppVz|^YJHCvggO*sH&71~#RwA-%jRpUb<~ltXU0O*e<<`>$uc`LO&{OB5 zw+zp+U^W-Fe({f%jXs)l1QvDQTAh)Q(J*?}IgnPH;}0rwa}w25n0Zn`IJVbWMUJ!A z;MODJJ1mlPSvV3$DCOH$*vd16gmSSL)QDdX02s(HbRA9ov>lkIySEXU*VNP7aKy!c z5Bt>&N#zz=hmjdBSP?Zv5$&f(4MU{*Zx>_nY^Gwu{54i z>Z<6PvVb>;A57R8`R#^=NxnDe(JNM}+=^>C2S36I^B)#j*NeKI5 ztrqj94S~?Mjj0%Ay0tHmIdkd{f3=H9Got9V<#66Vx9*{%w5L)i?(T`;oN!| zGt^QqV}4x1OEtfQnX(0nc!GGX5<2p>8>Th~s3#c%e2 zhxV~@UtGQtzkcl3jBhf4kjk#B_s;6?Hg)W*iNFeeOsO**y48ex7VCF%KMtIhYhf48 zqcDLAX=#emsdRgC$%Zb7CcKoJ)O>X_N5S1Vj1ct`15n&lxEWM)Xipo$QN7@7R4esf zmvkSTO~snNe`h!1iqCfK^P3lU2O@>{nrNWDeP{`je1-;xvZO+a>fy@6C_RGNP9pw; z+|Kg8h?k7a3=Lmh{u5}@;Az==6FnM|NHeE|b(uQw|oTL+toJmtz{Pk<=-+4}VqS{AQUE4vS9V0G6u3h_p zvG*(CePzt=uI($(`8=1?2h)K=u>F>)6{cSFe(A*n zYJuVxwnjjBLSfcE6A>)#9jvSF#zN<<=u$Cpw7txz59b^nG??5GoLwF z>TzN2kd5PwrPr3jWnGG*W$}6d%=-GZ`D@3i=@STJn^y9~)`Oa-%3C5`GXgfLU(*fq z<(S3mn40>4lHSSP2b22wlZgU;-L}&aTMjfq`-I$T40+}P$E;t54C41!eD{i9dl#$6!J&4mig zfd~=7%+*P1X2?~WivM9%_FoP2;P-qO^dIB-kCb*$fp)rd{C>Uk@7 z>A<0&%0BXM+Ka#GOzS;^uZJmDj{S?(P#UMD|2fQv0R7+<&+|r){(8@&K&RRwiU;qq z^a2=37!GUZDkc#;Z3#8Pq?Q8>Pd>Z@vKo$2P;*O#qfdh&b>zxqTKe1-Uo-mENhgAp z1yu|ix8i?h_XTq&mxa`vPP>-QM~wxZ^+g(_oJ5NHc&X8PoaZzWNRsIe14*QWBQF@; zzls`bd}N1M1)kjcc>V;%@f-Y`8(B~;7%)cyk4$goZ`!EpYv1G#PRMkm*WPGfq`a#l zM{z0kM!qqoEW4xx5*uy~*13LTmSE1Z@-Apagvp!pIvWyQU5TTN>P+v=60{(ms0DfPCjn7ICAyiR zI4Llb=;h)hK3;J>Bm>0UzE}PCl^(S)5yAv6hoDb7b`x)8L(pj#{R+W^s5={$^)1$C&(Kb+7cVuw7FjoHV5;vkc%J@GynpdeEoG57MT z6#QA_-6L)G`uYtqtZ*;iUdf=b+Q>s(Uks4XU;u_O)LOGf{){#_eV`cRf0C|errDJ8 zZ_~Zl6X_lfANpjBexnjhob}Q-;7UMc#^kAiO^Io!}ZT!_~ezXo>68+k=iuOE1U+{18uxGt&2=f}1w1%g4Ov<8~ z0q2)_VMQZVS^SMQGh00^v?no@q^~V-m6G^IrUs%50xmX;;lrX;ZzYihu{Ln!f^UW7 z8ULB1OHd&ELOU(nofQ<&;{rCS^t%c^v#TQM#w`mqN#ozOStFp{b=^*x2Z^s9a0f`<{`A-hNhmC1Zrmb&3Er|ie0-B=UeUO!D4{?7&T|j8 z?Jjwj_6=tFjYr?)`*hS>X07;+9qK3Wi`545b$)%A(ymdFbk9EwGUw4!;yiyG#u#n> zE2z=Qs^Ta=jIO9#`vm^fe~EgwhEAM4CgSH|V;hyfo(53Us_$s(66p|)RX*Q#i!JW4 z`oNAb$@L+7Ev-iKn=fg(w5KE@s(klt2KskI*yE$n?rufU^G8`AWw?I4>d9il^jD0V z#ZGg(VH#WPjHh}VpA^1STEaxNPJ=%fd##L(M})7zVlT@Q9=ID3o#>P@etdX;Nvde@(v>}>3Yj~&@;Bky{OioD0EJ7@TOGaNo&ve+M@`#iZH9Sfsv9NLZbF&-_opg9$;VjPwzug*cm<$+}_8dtCi& z(|pIKjkA-Q(OXk@siVi@&lvD))Aabp;gI2!PoN-ic%!FFM*0Bf#@xzZCA!I^uZ}dK zWaJorRS{H7+`4kIy<=P){|xj)G&VbbTIi;jvaWJ`xaxyfC0-T0T`=x?3e~5-c=V8y z7J%m9&~pLi!$#{Dju+GRnk6StjZSX zM>rA)iW6vF7ke>UOO-I}&rXsf)x|rVoNR8q;MU<)qlYAnEm?GATL)F^KW z`>{QD>K*P0Ms7@Ui#v$end}`LCT(!_7-n>{#-S>^qIA2Z4?D+p+09{(r>t~$3Js{x z%X<-VssfANABA|W%umRnem}AGFcP4&rU1hZihi?Y$7XsYbO6cyJ@hw*;~%c>++u(#e(5Z1IZ8B}GX7Nc!e3uhuYS4jWna;Nz$M>=3U# zHKlOqZ!$kRz|=jRShCGz^=2Talz0s_f5NCTC`E#Q>0GOf`A^p46Ax*ZPbIL#k1%!G zhNqUYpI#p2pD!1sq|#*U?%#GQ@wnIgY{rUY6Un>w#*LDa{-D?7M8JnY#!ApykU8X8 zkZWe*rmaR z(-=YE>OK2R_gytYMYa<@WwOV4IY;JxPtk7?U=m43JC_tkr|qM*-+18%dK`x{MrCXQ zVfO~QiJe9On(r>Yy<(OOPwKF5Ncx%%&FDB07AthByD^#-vi8f?cyU7GSAdi}%m-g8 zu`L3#3lYr${{hhB$97%+X%|4&@!PRs{P>p16=03MZqEXfB6Vd?`GS@{pF;ZGk6-X| zpPgGQ(Ptz8GdwZ@qzMOm&8XQYZ{yz0E7TW&%YSux98=%$wA4imMBh#I>pLR5csfby$=SsJ`#=8$ur~fS_Wun|&&7_fsX3M%k^~)Guyt;t%%~;Sy4? z-TjT~1@3AQgr$FrW4B5>w)3k~x7L^B*v46TL*IEJ_215q77y<&(P&?juJ+VB?n3i5 zC`tphrOsL^0iTM9zK=c$-Qmh29FhsGE1SCu2w!L4_VT3+ZALKt9sM~@Xi0f&^vP9N zhI^CIR<`HmpGFWB2=By<1r_Lwr(%RXm2Jo!sA3V`#~zjr?&UwL!6j^-pZ&=(ESje| z`1~@cT9uRk;zSq@+@+B;GI+3u76l%cumL3~F9T&YqE0dgs8GgI6R#{OKd!7F-#Y&E zW}p_sbV)n}7|I$~FY6>ft$qtr=kfg7+S1Kc8ASoAc*d z+gqP-{g$S|6B9gFS6Fun{7Z44C4I=sKusS`ju6Idrg;nu;&~od$`r;E9F+b8l3y=e zfy`~aZyV6)SBM;Rs?!*0m0v<;5%cZP5f8T&$au?N8`n;!uNB^lSbz_`qU$Cnd}pMT;(Bi^DL~a$W*M*5EYqu$~=&w;1N86iipZd&c9Fh{r&yRVVv%--$zx@|7Gn9SEazY?1K06NvaijI*Tu7eJO*5B*{8;13lZp49aN ze+RxPV}#=bj2z``nCM-0xnw#hCiwizHgv?w;%q|Fu_~e;H0>K)rF0bEI07yfy-I(t|SCo+6+Pe-i$FsnjIXKf7hy}au@;}+{ zF4T(-3zq}16Q*-Ye_7Mhe$?~OzVg2iN*{!BMDzi-W#HPH{Ntg_PAi3c2S!m~{thrU zz!(S|e>f^?MI0jfhi1gLLiq)Dcd$9qx^YO?jdoyuHm4hu=Tj4-G2D#lL$Zw0+@z zX=m~k`aEFd$1fZt)xu$`2Vaed;3*MVL(xgd6~}X+1Ya%RFX82_TjdzRm4=v`*pGwZ zaL2ZNt;BHX%<+)|#aYj)*}meLl{!+VwZF5u{SR-P|9m>LWm^2-p<=!Mbd@!ZUW(hD zk;2OA0^9jVEBE~AZwO2jnc*{op47n;_|kzl^{&DZ{m&NAnHyX!I+B~O?KksJNb-*6 ze@_CA>eU~)RM~F$jjf{MJ7GtqgTk@cm*XTC(;fcF1JZJIvVLh08O`+d2s zB@)<&;Y&HUBrfp_1_UK~@G^tix7u}jF3Umrr>qC-)b_h#r@M>yV#rtK-A6a-dSO>1 z;%A;&^~L7CNsN1w$Q{Bf$bU2sT7)k1_EFOb=rJ6%q4=I!N8#A<8Gls3dR6V%kv z;g4$SNCuXR?C)Np`plRc-fl(==9i_ffA*fq$Mr|Ql7p56`LvJnKUQp%8Mxcqu>RIz zb{-(tmd@F0e&IKt2WRJza36DR7sz--XLj-TswE9=eb_eg+YWM{c=t@~)ZAX?nCYi! zX{4t4&&(fVZWRBw@V_}% zc5GeW!DSc;o$^rp+xd#SE|x!CZ#$F6hREL^;g%DCpI`o7@F=Vcv~)Li|3r9Qc-JXY zho^6!M^P}O&A$k{yJM$V8#mJqAT#$4(RvO*dK|gGaXc^1RSEgIDR_E)nj=(ykfyrs zJAc7=W=DDsTyRMU%xwr((vqHCf&$Nrd|yRpxHaHsDU~LEgULD)b?bS=E~n8$CnQh) z!k=WopC#@Oe=OI*D?`69&N6-Nb~_SM>!e|)GS41+jwjr3_XGGl*<*0FLH#4T@@X(; z@wci!kbrQ^Jy~2;KoSq|Fj4l?IaM?j;8VG_YHIzk!}~MCi&;w_-J)xuV)!2puJZwr z!uD5wdd2-uD6b@gXN9geT|64wexw##ZnXW)G_!DN*Y)3^iGv`!lw0~)Tb2T%llSU+ zd$@vSMRtYXRX^-4NKz}RZ3?m^IIj5W(CSzDrp>^-%vLAzVb;F#>B|^mClOG&4)#6H zsJ8Cud@+>Go_l=7U;n^#w7~pLLKOUd@s95)5p%n(v6sd~#4PGrQqopiiqj2an_U+B z8nx-gkNh{4-ty=6lZE-Gi0`F2M&ewFc6_nKC7o-8h$Y#g##T()(uAjP(&|A1M~+(i z$R(p)eSRx5=NJNPs0?fNTuB3}`66KOWQt&wkXEe`qD<(&LE~?^5bMjq;W*S371I`+ z!LWw*b$ldiYnMYv6;YdZVpgE>r-5}S=-Gy$pYU|6Qf+=XWq50W_e6;C8)#!KN_`_5 zkwQs}M*1QYEBEtnT^^`d()AX1zd?60dwxUjs#LAH^ps4kdhnc_Fxo4Q=E%F@b8zFT z{#g&4-5LV@5#iOQMiW<^uaom0*wk^-SVgP6R;B;0*4jpANp;xpmq_v#O|Gg^^d}pq>kFJ?TjaT| zwP|#kbUYYE+^MaTtMXR_*Np0!dM{G`=vX5(Ai-C|WlgDHI#Zj&qZ3+x9%J7;8)$_R z&%`e*KsnX7`+jh7Nq)YE{)Ba#%dhWl+^3o4hgkYl4mD${nl|>a!y9cTqmt$v&sk>h z+usBoIh6E0^#!|xS)55oj|g|z)8E!zTaG2K?>R*No~q6A9#+&!smoS0ru0O!dVvK` z7(@Z~;%!*_|FhY4t^T!SaPu-}P&QhlU6LguUH!S&iCwuBD)sM+ogCgQ|QWyz);}RLG17^F3we-bz$*u^NW@0`-ECJyLVK& zqo|^G63i0YFWOeqtTB2w_}odpt$0dZ2Lko1{r&F+SE{}g?@YE%{8zT)tKWm7#}rr7 zRl)k)*1x_h4i{W*o!D48XB>TV_K8!$9!X)2On9vr{F~&%n=IZ(tbpL-=mxzQrqQx##A0KDkq-x$cpe09YE(z*8dQNBRP( z;R&|~Huqp}KERTP`%Llt|6>3gl>resAjWNHU&OnMwSM8Sz*PVlB67I0?4Mg+5=dw7 z4=%)Sj(=V6>k=K``I|TbIYqf7?)3=v=?ApX)P+WL%F|>Xm}pzYzqrzpAGzX799c>@ zmndEQ)Ww%7toGnGvJGf&V9>P9zg`OM#5p7F@&Z+@%=z=b9W0=o zoe$}=iyZ7_J$69*ZU)@j!FhW&%5@B&oGmQxJo&ngs|1HvzCRjT$;n<%G4}RUNmAMe zWQ}gjmP+q%cq=sN}#S8 zx53yq2U5y6Vlq}X&QTh>W$gdYr;4+HW-s7AB5_ercA=$*>K#Z0$bxY&R#lI2B!aP&J&Bg?-}t6<=7a<@Ka0?w&Xag|WLREq=Gw_d3hJqISG5_E@wo%R@lfeEI(Movg{TnQ=**wGop_z$7N# zXyJFB0F6d_KhckP!6J5GAi9u*DcwibpMG`k(b%p2#FKMh9&#a0X4^VK`bY0e&cHG@ zV;t1)oZRt$#j!J!iO#Jk;rloPTnqv;+pzkP7^>js z=_dU|6;%R92m5CJ#2B;-_%tP1=E{crI(fkwk$QkYukErko%&u<#(yZGdzL*OKY;Ayg4!fUy)5A z(Gy-3(YgFT5n z1nKhgqAHCgQ8|%R{|Ctb;{jUJ?Js(npIhPRwz3=@RP%VPRjEwf#h~yNp5f#x3mI=b z?l3u1;^pZ*AXdmWRD<~wybk^AMPFFE@_OQ6Au@@K`64{+Ho(<7ReVrd)DA7$8g|)*c(34K0>O zE=7lXksvl3(MweKWJh6-x;Vclc7T5y(%Ig8a<2WX`5-Yx6>!uD79d#uy4uJ2o+Ye6 z2aIdg{{c||_?&?a*jP9ttuGuYRc}UZ>Rgfar4E+eT|kS;K+(H&XPGseJ65d6KoS@& zrFLAbY|}+#7rxeY=nr4A5gfEKYD<`!DKnUm7}s zLGjcA5BjGEqlmnatxIBj-_JD z-Q5GT;ky9C%9WG}Y`1?ahA_sF_WKe4N_mP+98m^D@>M@fQdSlqPS5s8Aw*x}BKG_G zJlz?~BA>x?)Nl;!T|;DmZw5rYT({pZ^tisRHvuwR2Rv&1jXsk%1DAV;<3V)4NN&AOnRp@-u}M;=qKRmhS$f-u<|rlNs3xWkXS9 z2sfOa06yf`4rem5JDU3@V(QJ#N{_Vl@2Z@WS&XM}sV)@(;g{y8xGJkoMdZ1%JDqUiLvJ4B?+P)o2RFy?@ju2rcha9$)!GM>LE)<+()b{wl0- zNV?KL`lUq<4m0arTVvs*iLnm{rDUrZOB>(iQz3z5&v67M{(r17sbF^w$?SKL0#31j z=Kt2~n3sZMcn&`bmabZ658+)mj(ydZlb5z&b3fuANZ6m!sXhN5vi4LwVg<03)<`;h z#IE$N&Bn|6p|T-(XMY`Pgi8Uy0=K7tN2|6fsE}2nQ+|6ccb~#;u4g45U>)>^QPcQW zD`P2M$%1+cZl!DVp|shdd*o^PC;+W+KK#+Kw-6kt1QWfvaZOkf?g;hMeCqKq5Y^z@ zP&`zo0GLitt$k3T_MFJmHq96Ejs74X;+0YDfrcqMXL@C=`s2uBn8r#l>Mvy8*sF|D zYpL}lLbdF@;_hV{nfaq9qhGfXh#u)HVfSN>enVy`kK};nXJjRyT*sD?D0L8=0D~CSWX(ut!m24nUpeBKaUz-`9j*S>dov#A5GKU07!;uD*nr)q~E1wDb9xk zxbB6HJ_uwCPULHrmj|8)ZzBI4Om?Y|qQ6H1917KiXi+B^^d7Po@+J0^Z+Yz4KqxiS zID8kM&}v!R4%`)Bb%n5(;=6yv47o~2-JGBCn^ z1$9WtKNT+gFTQ1_xp5l>bFop<1h~XF`WaX)tyuEO!?b8=CFmJfRdW!inc}{y2fYv^ z0s`!XLdhAnZCdw2Vaf^ztlS>gj>8G;XU-)iMNT2TS>a}r500?uo$nTxJRNrW? z08BL3)}Qaa`wy_IvpV<0OlSF1EbfzJ1sKfyePNjq{LG>+?V#klWq@=tP-DOTs!P>Amrb(sO!1`%h$DMIN1ZSi~}PwpXAiCC%^AWZeq` z%HBX!|HS*P?nPz;sZ~K1oPW`YGx`>9Qe`Y#Yn6V-|#)nF0#{I)1d~uGz7ave1!7=D{`(9pF=p1jy z$7k;R%hM^viuoYpWMCo5zob!Vx2V1+dLek?*L^U6?%`f4=XIn-RD!t<0L{~8&~Iwh zgL+mga1$yYAf9%1U z^5H*BKRa*g4fL@aO(8m-zfv43cb)8b6ADHRmZ%Wb6#i4EvYO)mm7+~C*w4ubBP{JS z|2M;$PVYVU;OtbzoAYc8p?~fH9w&25;`onJ;rK}&WC(U2l?&i?j?e%hYM;%(qSfLa z7S0d?MmmrEG;<(0fV-dOj7m4ePK4#^qWSf&1XuDy*c%DoilmoF{TnSwUrg|^l@Jqb zQ3@orXS21Q}JULXAb{r!(D4uHeEGXFe8z_(%vTFIN<>xamSVQ8EJKyl0O2CnGu_e+% z;!{YiH!}YqyOl5X`Q1lotc$31K>A8mkA0Ka`XJOrMCkzMkPG3_7aI%to&dSD*OiN> zlSljzjX-(A1|vDEe@I_4U#K9(g+t9lWtb|@p|rrhX3vVwQc1~%@75d1rw(2jE4i$^ zKzd%NX2>x)V{YMp6`ViweSne&MTm`@5K!IKsEAxByX*Gt(!OE=2G_F^MALjMB@LDz zp$p6?(QfAC+xy|CM9ZTby^*Sbf6d+Z2yWxM=JVZ4-g50wq|X)Yh89(M*gK!yTRRrmaks=C^PJNCHkYD4w(i8*=lR-y-|{zZ zJQzL5p7RKjW(_>%78FiQ39m!k#8|Z{v1ringLwr$$1LtKi9O7uc|8oY+`pw_@36qW zG79U7)m2;z5X=Gl)~%4!Ht;9`{V*gU_v98;{MD|aPFzOR1G?s8o;b$gks{(zn+TYCo z?wM9crf;0mb)iUDah1pc&4H}4B;+TF^|pBWz#nlAumIu&(yBlEmeZIYJTW>|422cj zucgLrrbT56t)(gY`&AaQNCwE>)kZ?wu^7C}yEYgp4+` z$EP=T-XK;l*5h|TcXldfRriu4vzQ{uj$t=zB0Pr7Bx}a9T!=izHp3RPd?`FN7y ztRP&_;%Fm|!E5iTd8bT+Q_b!vNRw)4yNouA&c$mUOC(v)lomj)0t@m;Cy4-A5@=vy zoND@vjOy+Hxgh!tfAC5xvx+SW$5ltX+uWkiGtylK)h;DRbWS6pl2PwCy_`ctvxoMM zdZA}SN}a9phACo94Ab#MD5vYQPwXAI zERRamP^<)|#aAMIdDMdl_WUT}P+^owJ|S;Y|3|MnHIWYu)=!8sS1LfJpFh9l{IAvd z6Jl6Uy%TizaM^f`92b-MTrwLPz1I})Yt=fOIU*`e$y(cvZ0@gEWqZY}sm} zu(z``_NJf2hex5yc1U2G+w~IP1R*3!CD;#~*hxHQMCXxuQ4tv{HW3>SF@gdJu?||j z2+Yp$wQWD<^l@ALtJ&h-Kih|t_SY*#?sct{$D}T~gOlvG)L#9X!fr-*aUTf1Rj`MR z^~4P^AYEfCW{c3smH!55&%z1ffp9lkh&ZNNvtE(jfX32E`lU&+?Rk0@iWSEZjGsTs>TLQXXpe+o7&YBy*SId&9-oin}$o*mWpHqI|R&Sw?m zpg_H*d?M4Fm`L92>Z7uFEw6_vP9SY$hymI1>f)---n2RvO;*gtPWyB4Y+TCq=Zu!T z>~0DLJFOjHfG~JFr?<l8pD_* ztj@~Fsv%TTKJc?*OW*_0Bj=UGjItojs*&@{r)Neo+&i$BRx(e3v$qC?x&<0;L2f^! za_vY2!7HYv62i?B*D4xHhf3wnrlr`~UTVc)EXRI>d8oC2Bn1~n819;HI9ZXSHujSU zSFz4?`6Wb@QaVm~r3fu-#x_KrN0aeWy=e6_>h88Vw2YzWHFMxiE%;Zg$(}xHX)|Wy z6qUlHVacPw&Vtz3wIKRBPo)2xP8C2I^su|=D8bNXjcTaNC0aWe*f*1y*##n|${6N5 zEw`o8x8}E|*ieD@qI0$+ku0jD)A2{bT&moAJ#pSGss{0M&AUldwh$W2wDdYomaY@$jFLW=^m-ys_IKd$3|(eiy;a8SOi_a z@b)A+)4c6rZ=F?>1vd0kXH1#;78nh0y!*P6#@U>&3gLttr}7>^w8;jVzTQ0UplGWJ zh41QzjT{ec>$g`ZtK-%`1y$CHmrigCenrb7lFcX4`OPnYl%#2pi&747v0eE;q-wvR z<=xQzJsSo^D8#SU-)v_d+$pUTkbXaI*A4d+XH*DPES&WuG7N0en zEWWvW;?44(-imo_HdAyz@=N6FugdEgOJu%Fiz3}}U~{8n1JPoJ6Egc`4(tf!yQF;) zg!SWag?QwY0u?f(hMf!xNF9uSjrGO1<*ybeFBY5y#@FHZ23z&lYvK^dcekTRO{)1& zk+(LvvWsU|X~Q9Dfd2;l0*4B(e%Vlw9`Df>)SKWIiA*oSYs_iDF$90wd zu3V&o_NixMnE_<4$~`U|FHCqsVIwReCd_X#q_iFZmmoC~3c<`~pgQZ@z^(fy`kMpf zZ+_YO+_Lj@f`w5xKO0nkC^G!QF@|xVvZ4GM3JCOS#B4LIYWisDDR^vmIfd+AI=dH) zfzOlEIQD8OlEkQ{&veTO-A9tD-n|*Iz(j8N zX^=WhPq4ZLb)D;HXt>T8E?-PC?hf0Z>qBi*g(+?Z_wX9wW$J7>4={2b4|UJU+v{8G zMN0z$nXI9Jqmb+KeQ?seZSWyA4S z+qa7kmTO~G)wqp~g;ai}R%gO*BlGvKx0HuflE?$bHl{NeGsaOwXo7fehb`t`+y#kj z;v++(uEo6quc<49=Danznjp@fqB>~^-=o&PL0ROZvKpSNBFZ~Rs0fsEIU-yh>_PR) zc3Q$v4O`RREqN>BCjL``987L-+t4iAHA0a3mb^TtvBsTcK}9y<=3c8^6c?i0lIp{x z^y7o2Cjj_f7O=Dg79H-OHKXNkZV3%Hd;hu5@_f97AokVa@{g1T;#^vYC;V`YtJj2hvG2**AK^3~tM92N1e2kR>yViBcuo9rheko; z7FbP#6O}^?>(gaS7lsvT`(*vemQ6qF^Y@%4=#%B4hd@8&H#epf@f7b=bnz>axOjhf z-FS>T2|jVvjpmUboGALrd{+;|Z+QQ@ce!`9UgHst=g4zq5S%xS8WN)?2)$2g{>@-) zB`+r%o@<^F#B^EDkPBSlCn_rxVJ1&=#v&=PBlqf6)nM9LfJNhIxy$98FLj;PAM|SH z_;FlavCYDAWd!f-{cT7R_%L#Br%%Yn&`j)&m=(%nu^cD*6>%733No#xO@Zd7UB|c# zOdJ0ws^};yMQHx{^AkTiwXte;Y+7KK|KMo1URf4fncO;LZ7ov0OJ5BMCleX-DO9pN z3(0rnuN5Aw*r%`EJOR;eQXSP)(lkLJ0_K$cx z4^Tk;lPC(vw(2>~sk3A@3CdT%4q zN#CJjE1Q-o*hh9D6L9^uK^*f3?H_{s$4!Gm4k1}l4W5TBpXlJVA0cxm(OA#cpF&PC z_#0)siZyDfPfvEX6UQdsJKZ_A)O3^Wv;C2P_a8-ftm5{Qv}(<}$}Mp^Ii`s}J3x`; zx4CDor%Hs@5eZwL*9o1+NM#u#FI#{WIvI^xXOQ$)m#14WqYdqR5Aw5Xn_cC|EsBg1 zoW9=hWBLWQCZ1h-^uHYmN54xcub7aFmo?UY42m*lEuhMBxHgh&WuHe4w!|riHe8&t z{E*h2Lo=`C=gdeggDGF8=Wq4v)+HeUX$dfN>PkNofqT87r0|EEcEBh_96f(d;Kw61 z0%>gB6Nd-0aq_|h{zPSO)vi~EglcAJ%UbAUeqY3c144$oKDBe^xqdPU2Vuq5pob1<($?2Sg%`4cyJ z(6V0QlLr{u;E&})bJsM6?|Bx)xq+S>?v%6P6PY}xri`ON>Z8VwNOy3Af1!jwi-vge zC^Knz{PNx$&A1#9b)hKddJ9KS$9X%cmQzXkhwdHJHKaw{u;k6h=tFY))<#I&#C{`J zS11@~PZIwOS?{bA_qGksA#E9htL##VVnxBkN~bwQQ9C<8A1s)d9%mDkDa!U(UCFQW zoU(cZQ9my={mvY(Gl!VhCgClwAN@g(q}&MnEX^hR-@u^(;zfzGn8nS|J{ z8Y8IUrX#Bn5t0Dy8+nMJcUnK0*439oPiPab-hei=*TCwu&w%KS?jI9TjmAF1iU!@7 z-pOFa+^wXx4!9LF6$9R?4bL#F^Z8Fj;VWtyV)=FDmY-?R7-AOsr(cjcuZ?HM5KkmT z2!KCr82+|s6fzsonmOUeYUmyJ-mpLt(#dkxvuhLATpV5@DvW)oL(+Kl9`h*W2uTyQ zOy4>}-m>E=pPAQmYlZD1#dYo25MvS8vOfK=bmlhm%Ln4)Z9(j*$Tw}&jTY|OeH)6n zLA3f-o|m#PH`|NuE{p)uIbTj6@awbm@N;OBA7i+$2zjbOBmt?5YBc|DKM1=d9eEdV zLpjjs+{(*b$sjXD_VAY+nEvP*=Z%%II?~w$r{S71V-Fl#t)K&8CP=}wUwT(`qej`X zK6BJ;&e&#D$aP9Rk3$v)K>PaId6au68T6=W7X~{*4rNei=}2g2pXY(Zn0O%0ksC~! zw2+B?h8xCKno_6S$J}Kpsas3wNbblfuL}F;+Sui+;B2Qc)e7NB(m)DmTuwQ0X>;@x z_ZKNE^hcfDgs&=M!`Py2Gh@48^=UD)WoQn;&tDL(*qq4sUQYLAkgyC6aixE*stfyM zA}1XqGXH#O%%Xco^MBy`B33ewqS4YaNkR^ll+4yG#ZID4|A23n_bx};_e+$9%zJqr zt$1UGE#^1NwK%ji%+IP{@~t(JTa|!D4VTe4Zy$dm#yp-h@HgNSg&~r9vVyAGaB{Jr zOYY>aea2}G8gw+DrPi)f_RrzUJa()-nBS33QpQv-m&sF`@;R^Q{$+DPHzz+x{@*uJJ3JjY1k>p%%Lkx%GC3+k1VQnoTrn1vM)e}trp zpDQ|MC$p;>{SV~~&dTAc`0Gmc5wKx&ciE*pgZh}ClOpCQ8cza;YO+deWV64B8@7VP zgBhVMwpyBzG7n5n9y5bLt46kU{(Q32VIQ=Go5b?%O_((F5FJe5x7v1>Pt1blHUris zN$W2>`M%xwP-1k9(fj_m1INFQ+A!-yW?XB(`s{MkU=o}GPoJI0ZrP+ZY`nRdqRfrS zg>jQmS|#CiU0w`8ewrAaUHA~O89;AH0c$FPvGIu_wETVucQH~M2j-sTpyXViNyT*- zlbX(m3&sxfPEt0DFdX&lkA#VqiNO%EZeS~i16Z7+24U$QBqG;9t5`H`ioX8zOQ?l2 zdH*a*u~Bn7Am+j}DVz{Qb!&jldndr&_>7+2cj@u<+>aR)s~zeH~c)(6+quuieU z7MNcQT!ya0W48VzD(?=B`}ghdbQiv1*87pXAiD0#$8uyt4QrE-r`G5TqlJ(#-rIg? z1r%dP)|u8zUEY3zwnpR~+Inl+@~SeHF3zk?3p7;2_5ehU*buN_v)-QRFtB74;w)P6 zhr=wRriU~I9~Cxg?up98s}WI3D6FIR=agZ^>&i3-a+N(|Ub3sLnXGwxk<&6vi`wNP z`=n}i2izTwTvFvJ!OXWelr6eQJ zbz;WrmLU|&%~M{&MXfEa%v*cyBep`lrk9S#l(WqroO*s>68yTK82;{P#v)Ay*}cj8 zYu(Ms8JziCnt`Ec<SO$0IXv3S$b zAkw{8^21i@TDB~Tgjz#IiD#AUj>t1d9EpQE`KksrOrDh!w~?L%hv_9}S}Lf=9MTV+ zZyJH0cz`2}*2N9Boi4i+V@TIb)k5HQ=jH7mmFM^5m~TqGr*q}M3#Qg2J7V~u4Do7_ z&$h5hJ0lojNz`3^l0A#32w#XQTrl8?F8-6E7tKnpSeA@_BeJdVhWC+EJwh%Vnqup& zQru?ySAT77If5x0>?~)`Mdjv*2^{U{Zp9JKQ4%*Fy_&P64;k1x(3UJ$nmE721`773 zE%B8O-+-W6?Ak95S|nKOM3(2SRgti(6+ySKA82QKMWGm7H~&{n#R~t~bfB{(W%tAf z@5g1Pe_M*YB1>~)o};Q4AE}Qv!~iKOTl_=*(Y8Tpsh5I3m%J=nE7i%>oD}ouLfe{X zDdZty)Occ?3S1j`);kwZ@jxCGQYsYPYwG~*?Brr5U4Jpna-tHSCKH7y zA@w@>3WsFQ5gDgC>@TAe>dFS~l^a9KqiI`wS)MXAjCr!rzJ}olp-zjmAQn0k0yQ4h z^yZ7!1kFHu%fXtOW`zfNyRl)@pShHiiMn88o%gia%@RAo<$!gI7y-z%|F=)~Xoj!f z2$KAiKc7f$2-by-G3%F*wye^$7)3QX8$r-1i84#wJ72-X`lN@zG&=6+J*HcmEca@L z!-|h2G-jsiX5z1}l1?8k$iaGbx)f4}4lw?jFZ!#EvlfLj zpUv;#{zmn{xJNYASPeLs7I_-DYHb$nfc;}UvteVcUsbKWE3xKn7` zk7e2OfgZ1df5Z%_3l5sUO>l6@Z!X}t&kkdKp%Bk~bvZ3_J=EKFWy;N}9)D3y?6WL!dfDu3oc!z&a9bU==PN6eOT!UNqv3jLHPR^*u25lp1$A;=0X1Nd zyM`vm`@vi1a1Voc|6%+IZK5c(B~DcdrT5^*n4xtGMM3?OEHR!$jFzu9yQ@FE*3NG< zyGgJdrR?mFc!jdOgBLOcA;09vTe*Me z?8IHRpm=w^f zbcUKO%)?e9c-neA0&zY?o%Hg6W#T*MGFe!Q zJNMyEEW=|d&vRnn9q%!@i-U9G)XuCh5!$NudJ=egd{8M{_A09oPf$zQ)%x|Lz8%|= zhr~Q6XSKh7wC46+3vHIHC;RS65QgFF-H|q3Mwew*4xsl&33Q8B|5_TU`D0IN?BxuV zpJ5X&Whx2~8p!l*CmUF>i#h@*$UlF3ym3cb?26bs)>u09Hi1kC;<=GGHBblm;Uio9 zoRB)sChu#Tak(<$b!nrMJIP%Zg>3LN)zrpf7B;5GL7Z!i@H9g6lO5t(^ZOx%Ip*C4 zDu6nEHvewk-tNMQ2j>9_Tl2b%do;T(&6CoyNhl&2hx8@A=gH4xDa<3}jIyf`nI$lw z%C?N*rx8Yud-+w4gUQfTL+;D#ctSZ7NHV|&6m5JA9Mmadjx(m~xjDto`q3C*+E4=7 zl^;U*&Re<6Oq!;OvB66w-Jg1_eFVZ^BkSt>x$-n9aZrs_>zOz?a2}X7G`?d)6vPQ` z6_v_=kgtYDUN@#6dMuS!Bj6a+o5d{PjHYcZxCay^Sgs5X?fcooIL9Y;x0`c3Z{ zv}Pp{OM!|~Z63VI;233lp}I4zn-5?IMXC-t`K7)hy|;=-r69`I_4=H` zp(Y&6pV-WcZ#Mf=$WjM)#L1~Jl0Ha7I2(!4?bkkO1Xq-;jT@`_^l!PM5X*t8pTHdC zGG>!XHW@{3A_tQZVZ|9hwy3pC&PN<#9a+Omx+P?b7qlhUmkTpJbw*Vjsixv z@*^2u<>X;EO)(Uho6KnS@XEAjRm9Y*jk~d%rH!PtQknTYEbg`OpnTmR=UDAOVKf`K z=Bdqh1V6-n#c73l_6B6FPj9lgdHG(+$2~ggE@swKV-T!!0IT%kDQ%>SdE4 zdhY*c0Z2%TnCw5<9x{`~`mq(0`ERtWwK2Bs@Ru#*am9}NR+ZkHOWCWK)( zJ7R4`dks(GUZJ(jWtE;L>x|qf%&Cy{UN>{8q}WJFYq{jSAycR$73c$8atE4I13@ft zRHT^F>fpkVJc;Eqc_9chU%W?2zEIX;K{9hWVP@GKrbrd8s#EJlrE50Gm5NFIZ(j+W zxYSzWt#WfNb4Rh^I&OCh7_;C$?7`GQr81M~VG>T{usEb(BGO(ea0u3YNj-LmdrE4O z1^a~VL#|0RAQ0RBcm>tlS+}m#LU1MnrCGm=6Q{k=+O+>gl*-`CoMIj#U}pHFS46*w z8Ixh)q=lS*yt$$u&HbKB24(8iJHO($2uT0{s!NJ%SRoW{R9pLUlE0}D#USt5emDv%A#9Ss!-uGeL zMynUE7V(Ip)4R}fLyEqKaVF6Wm#B$gVXsR(=3YmuKL%RcG{XA3qr^O{(8#Bu?r-xT zYbz)BOdrv1+InN8gEE5}J8iO(w*`lo_nMc-UsqD(Dk}jRdM0x_19Jztt+Bc_2Z&RFa1COMLco0 z_yq&dBOG4^-SRpmUZKq1n8m)rSpS2{8<%xuOLc2HQ{rB6@Yc1|HNIQYCG9Ee8v?iQVmkI zV6%<6M$ed6+O17Oh#-oJ*Mz8qDwJ-X#00G@uFl5z+2&Qccod~BHE#5M>pOV61=%J? zqN7Bo68GBE6TAt03jfJq$gzMTcvHx1%GLEp!SV`qOs4+zMrJ3s{HeOn5*T+>y0?t0d{_kG`OWgHh~#;Ft)($*R2+$vZDB4itC zD|XPNEv*boh>k+oBtU==vQ4E*tpZxA7$AuiB}E{yhBYB%5CMgR2mvBX5)nuU*+@e6 zh3EA5d!E%Ip=#m-|Ko`*Y{Z;tjxVpdQufyk{g|X+Mj?^-d$i?`<|#6u@Rln znecbYCbHdmc7ZS zf2UB_G!T$2p zPlL|WBEbVCL=BK>0XypG`Ajq$Vc1HzE%0B2LZER`(k5SI&12KwFq!4nl^=TvvoLi5 z%u**BO;)g8s|2Mc>Pw$+Yy%x1FE9!5f_wyg;aVLEo8+D%i5EZDFb%>vkzY1G{;T7` zAQ4INBnniw$NMOYtAm8nHD|4~3XY=11Ta%?U0(dNu_%1o zpfxTbYtq^u3gZ4{``m&X$;^{b8I5DXc4WkWDY~hW;NgKt^#QYAJ=38C1|518Ph>)G@ zxTNoeHVPk87WLrGX(~_?C87(qDA>L6C7C4s?^T_7X-)MbF!LWG#*EAeHf9+xL%)o! z5zMJ(I~cd;7?KK00eAW3`rCj>uUufrqMFBDGM^)rNke8bn>?)!Rmz@zDvnO%wlM%Y5rbfMW#z^qjgf`)%_D?�sb6~OG>y~?L;1d>sW3BJ zK?+2$It+)Ou!Ypn{{HOd7j?RSt7AW^H=_8GJl{coR%|qdHJ%&d=XppYf0HjT0j-qm z5xbDE@c!{3tG&;mhxeJ~a}g*ECgZxDVzr^5%?5*cmdpELi&3*Y{Z1+LZ0T1qX+o62bi6 zwHkbo1l-Py$v3j4lnXmaclK-wyrg)KLf7=(W~LqgaJ;X?*Cxaun_?pIs6PK6En5Ih zI;#EzPbq7xXXbkjhWNG;FRzUcnbU0&mRDV1tw*?$L_Y3%*nXB0Cw=p|W~WZ{Mk_Y_ zhaH{z;1aw94j5G{wxbu{%qP5+4X}|ONX5v$$uyU|8>X@pnXAmNHc?J|9cxzQr*d|w zq>jPOEte-g+xWOFj^6Ul;{9k{?83XafWAqznY{iF!jN^~V2G0U)X8tI4cgva8Fm-{ za1pE-?8fX+{PmZvRy=>a5j1zHi-MhuBuvf|=l+PRPF@pv{Ij=YB%`wm3fY}+BoyNFfyzo;aH7wU;eN$)LH5i%>+sG|efkcEZ*hsDCUh&gS1sqPVVj>jzW2~QVUAH& zva#N!CH6g=AiE>^*a3CS|kxkH%EDPuw{^0a);QHWf^#9+JGozxc+gtsZ(mV z{--}t^Pu@#h}^M2u;>Py@pfj3cVm7jNPF4!mu{4_A<5Kd`V9ZH6=taO zg*my#%Yka1bj3J=c>nH&*kIv}R^n6Fy(P+ zE3|i;{-HzOjDF?Fr^(4&xQN`pYgFYcx4s`9BN>AgLJ~G7JTiwr(TH^j5p-4>j_U(owR69l*`mxaBn#Y|0>qTLYkBTb~ z;F&Y|A!l}qQg-GkBzmwDv^nh~g>^lQ}Tm0CWgjO7ltON_sncmXMp|De(3w=2b*>=1oM;Z2Ol1 zj^||^Qt{5aZMBHTrTimalSPnPYG?4-T1bY`3#eT;&uxc4J3`I{rZ!);Sv0h%cR5^c zA2BE?xI^tjmPLb5^OPLAR7uONT>*$d*a*fD-HqVe36}se1%=#C>6u|3O`4A}B$>SX z{kCS4X4Nyn7g5doJt!B~4XBWwP{rTCDZv8;RTCxD@=Am+XSp%+c)k72{lao(iHUs$ zwv>!y^`d&F{tyww*&?lj*AJ%x7ZFq?Lp%lcr)U5P3F#COM4k($Qf5E2j&Gx>-gf<(q;(rH~=>yb07fsf})nk z%(J{Negvpbm(=Z1zPPhZv3K&KaS3^|^yBh9%Gv{I>Kdd}u{GUg=oWxWgCrIT4NUrg zI^kbusNm!yBmJpXpfaM(d{&9Pq>$)7PuBq(;JNR; z%iMZY)-^1?Qa)_@$_uR$&71O#>sg31Dv^&_6Mi>F(nQ2^Fi5W@Hi25am^!i-8lt2D z@-*0{C?ZJp95Q|JD~I+5HR4GM=@DWL#R2k*2xHEsMTcTZ*N}wnZTuin8UZqdE!uRQAxMOKA`5W?!cG5*tpXSy9x z_(Z$WxR*Yq=GPC@ZvVbxo zUR^#W=S_Jd)!Gog5wbTBB2yd{j5g20EOQCgJOnpS2w7*F$~C3)2Lmh()`k;!AwxUe z_Xp4pBvLxnu3fxxo2924@{2f5+XE?a^R0hXIn2KKw%)0-vBBOiGSx@GB^@z_ zY1zalUWwCg<53|Q2XKy8sJG0d0PDF(7j*LFEal-296O}-_5`2fdb$Xy+oX?sr+uxw zRAjlJf`UjPlfH_MsA&GR#90e(lsU=(=}uR-ElDK+08B#RN}wpX{gNowJ!O1~+qv>~ zoowu-Y!m?K>gCaL;5klOs({n!0fUe-&-&xep6SxzoH!}QVv!vyF)?tY!l3!AN;53S(^goiP4$8d9oe12u`m!lu z!2d6j+qqpN_`(a+TiHQ|>qrIB5GbnDyt`R$6QY>2`-{5|ih2cq-A1^bOVdr!4cS8X zgH%j;FJvjY(m69fTR)xAm4+<_0O-_;3c&9Y-2XYZAGVl!qO!tipCe4ZK4;S2P$$k0 z2AB=sbT?a{Iu4an)*OiUC({_Rf2Boa(bSPg@KO4)zO@<+)=R00cMw2=4euh?h}ye@ zF?Z-W3SR;W<)8CNekQsTL&8BGS1E!KgF1rswd-Q}<0+yvdi(8N%U~5?(b_G71b1ye zn1FI){YqD>_b6so<mmi zl*SY~UN~@$oJ;bbCTxnSv3My+ybwba6SHJiSsNk+>;|-ck^Z!yCqW9H_cSccbM54~ z&bDEt)bqQHID&e@p`4;%J!W^3@n0t6e1C~xr2locrpP;xco;`nbUb6dH81Oo`KhVW zpq<+Xw^#@IRR^iFhLKBWE%&!QO06x&ua=!O?TvlO78wf)(+J+# z*FUE!Nih7n`DNx&Jwp5Aw{@ibYk+|p)qlHap_f;`G;#@PMO)**q?JhS)sCC-nFXCa z_O$7V{>}fMxWB#VJC(qsa+Wz?6Q&6WA>GfT#)t-K<>8(Rx`v31m442m_S#!M0Fj3h z_THfKI7ja{U19t+uzNO4nltI}E9nax(AG}EgmV08dlvy)x)S3W^yP0F=l%N~QY%LV z`8RW(H#xX!U;oY&OCd`h)@5YwM8zx?m6;Si5~WB>V$yHSd^XSLm{K{S)cKNqr4Ps3 zJOwe(xIGKXLLaoqx!eY);Au|~(w_uI@D-B`-$_K|_811u|0$@czR{n$VIKQkrgBeO z$5H3y7_YblYHz;d9DdXa8!J;LcVTl5RHK3Z_U`UOJGS$@^9R$}O*{?=Kmf z=sJ*A?AquPX2OzA)L*R3MN}XArL?Zbxf0cdAe*A+8D6^VJACZ^1z^EGod4zS)x#Bw zuB<=JMO`Wja0q;M{>U^Hze;4f2%H*3C#pz-gsdhJR<4bQbQ`HYdW?>JW#W=#S9%Cq z?#?1VMHAQK_^D>~mjNW{OpQ0wyv&0@t{?MEt-nhn?_~Mu2)F&0#r=pK;&G15fzLm) zO|AmqrPzaH_bTP-!=1P-si+_^F@wDzSn?nCbYQ~IxH%-O+_5~OYLyS_3#aHUq}@y| zG39P@P72Xm7CT3qG=;}G&KP{~ z(Ha!$!c5~V*D}AYHaFgs)G#ZRB0r(RuHv|&kYh$ZJvXSM=I zLzQ`UlW1~tNj3lD`q}_hN{RF|z#&aq!|rrS%7PZOP=7K^gi3#zIY5EpY`H z1&3q?|1MCV!fm^W0p%e>zQ$z7?k_R^=dJ8zMw#+n%rasTz>DoxV<@bn+XFn<+{h#{ z5L+<8P}Bv{w8#`L#JX2+P<`4uh~ZkZ>VgKH@iNlLF>-Spy>mDviL<60kTS-B=fuuUCdb-ILM7D zroljP-*|zf_2x#K#srjiP-sKNKLZ z5luSCQ6}2o%d&3*y7@bY*`~@LJ5LSDtYReeOs2QzeQEX#e@a*pIJt(JTe*KpctbhO zoT8Maa`Vw^S@Y6u5`NNPVrHgUeIl&YRvdY~ql|cym4AyvOfE3Jo1X7U3gbof8E_1K zp^rYql0gLhJ5>EMrkyMx(ZE_5SNe?3(ken0GaxMRDy9n37jQQ2`kOk?#;zD zfS10M0a_%wEGqDl)BYWyY?9qPG?pS_a;)O|#1nfweD=4({jn^u#7VqdTza?jjw-Kn zF?+5q)|-7mmt;^udnw%{yI<%YW|o|(*yiqY`|Tm-Fi(;~1;fNi|3!P;%UOw% z+u-w=zW)_9wt9iM7y&lT?c1abFE8(+`V<7p9D{VSR%SIeBn3n0j~ne$5(WD6IV{lMjS${m+fN`+KaKUEAQ7Fud2V_6?%+z(R&2y}QCP5-{;*Dv;v45$8ceV-QQmt%Wb=^H zW{sq+uwft2j`$nZfD)fI*eNoxl68|134)IZ=}6d_2Ty^iJ`c**6o=(ql5-a@=^ch4 z%=ujIn#`ZXL=atVGojXn9gYc)2|sK5UlG66|G$oZh`g=-q>=`j&Hr@l!M86~V_}!p z_W9RyUg1mDi;vZ*C;Vc)`?kmMlPP>a1{-8J?JWwhkW7{SPj`gsuD1F&dR(ROho zo0bcx?|h9?HQm>l^NPvgsy&V7K0bmDtPQ`ry!)wo`sHL+Va=Mvi>5|Qg)CGs?2aG??|g4Z8*ctOwxaNQ*8Bg~3Do$P6&pz4Rk*%$e^VpuB6|KQeVyB;LY%jQ}= z-E|`K*JGoHu;7BS)pT_^o_vH&gcM#UjMRkIi=C+0w8t=XcV-C;h+wt5Zfvgk1x+4~ zt8kzou90*YSB+aHZjimLF}C(I40KE!N#HR^%!3m*SVdq=E>3t*}~Y+*tDDR2hh1+Ir?(>>SH_~+WP!R>ZYj>-DSg0qlC{6mb{q70!lY*j(n8hY=B|VDeUwbvUt>HHf))hI{5DmIi9{S!T-DJaI z6%SbRp8`xqJRmUm!2WoLDI^SS4_8sVNAH+4H-`6>H`l84`+Iu(#&DZ-AM}VIw%a8P zP<#$Zxm5f{QY?_(s2$n0p@pdphPlS*=QX($IaiOP$n9^3bo$DAymqeJb&ve2PyE@O zI#DB|@?TrZgFDJ7H@DPlE7oinb8GD^lpDNV73+QDwp}+bb{YGlBm@Qt4Pfnd97E~) zg=X8N{ND0mYMZh$COf14A;L8{ao4&<@>N4+Vs~>d@MG^HEb{?GXM?Gcv(!8J*O43X zDnUU3cHZPe_5)$gj^jO^E?Ng7I1J|R7$oIj)@A^}x46I*mO#b6$XyaR5Gg9T53 zPublQ^$pJXBx}xlXV%C?7nSumOc@y7CwOV)8dYislxZ#Aq@FErgs-As_wjxccsXC( ziD5CgO3Y5gY0e?Y9#?^38y1lcOhUz4)GYZ(Y}E1&btJ1FKRt0*@q_z~Nf2x$aIbpZ zC;dtGZWLw3B$z|Z-`$6QkK@zg=j3x7;6XVOU;2fbVxcq#!beT6+sLyfv>|7MVd7N5 zG+Ls4`PG@XvQhkaFg=oiy$S^B0aj(-_aj8yV@+fg%p~+4k0pJO-t73v9|1&wk9_#i zY1Pj;dnh-Rr&+<_CFAzRRmle3@bgv?pV`j!tiFM{8U&9BD4jl=qqCjFOz zXEr~I09&O-jY$y}Zq@h9E%nA?bCqgC3IaaIk>_^JIx&kT1{zxlt1(J>gl7SA+yR#Q z8AgKiluMO}7&~xjF$@`J`s4)>`8l`p#TT-?d^Zy|$>nseCH9AyW$#Qg_8#u%-CvBZ z^p-o>ZdenEPg4V^}XSV6+_Y> zy@y?MHQz@!xe~n;u24k1rMaznK>4nLwXotegNQWf7c;Dm>vdC)@)GE_nk2KE)Z zuA(T^mAZ@3k4KhZGxKR4f$~O@7r<@+*3du0`b}B**Y2sO>G!X;UZ-K*GChw<=Q^|X zm1TFu;>Okxdc!-@Cv9mNC--3Um zsL*+SX)X(#Dv%^lXlxyc&`X*6C;J2b@W{!vReOKHz$28-JsE{D2-)iCILYtF%C5F->_n zy}bOR6#hofG)}tor#_Gwi18Kj5mgi|%D?VCvY=g`7wqvf*n|bmvx&D!)qx`b&+CIN z4Na87^E(w>8dhsSbxGIsM*}UNHTc(Fa*&yx<&7JNtWZ?$#k&r@YT2>xI&%cFiX9c> z2W1~Qbi2x^iRJS(79q0 z6yS2#|0R<*`gAi7s2QTu>C&OS%AY$EX_LbhxDYN~|2`To85u;$LcynG!N-$3iq@sZ z_`1IK!qPa1ShlJ6{|5ZRQjLF>Fe%4kj&6_{6ZK+A*+vrNqGj@hA?6Ie*d96hga2!5 zX*gP1uHw!4XQJdKk@nYn6<5N+)^S`2oUi7tjjMO~(S9M#n4!840YXBn*eSO?sS3s+Gl z@4D*m--#c;^Ic|KkRjMeaFV$04p4lpfN|V<&l4XHh$(&Bsgxu9bxPUbixuHyxQdt) z%%B0=ZeV(|cX0l1f&;atV1)+E%kYjTkXLkaae1#~>|S;r+G&?jW>m>9jsr~-OTclz z2lV2VjiVdp2X|UbZNSiE9vwZch$&N^lg{sV{66C@t(4~ZN>H2D<9Uk7CSFdGb`aag z+B}J&2=o_1|E^7luf~W|S8F|fiEQ#yx#l0FNs?deyxcHr?GK`nnk3roR?qSXF1B8( zuls1fFb$k$ZLwyv_{22qrO-2v3F~*+N%JTfPhD>!3Ey-lD8UHD{YD$BctXF3Vb1Kc zhc3Jj#HREbw#IYe+PS7Xt(9K&OXh0*U;yP}XIJk*CWiWjkSg?PU5VC#nGEI$Reb?P z{Iuo#@!ls$>ORm7m8%WWzf5_C(*V1$PmUf(?+j9C*W=?!54VY8K<2Q`QI;O-nFm&& zT3mphwDcw$6G^)NaIwN!b}=f52^5Op7abc=+uy9u1cPcVaT%Ad@m&T1X`xQ3K4Bh+=0SE-FWTZ;yS;4*Oj-dCabV@-Ga!85h}u#R=6d#U@zIXU7HM_@0e6!ir3&HW7v zcAisrBqcZe>T}nIgl7dpi7v1%R@gQb(=h99bsPI=f`IK~LHcsS3m_j}8UAX{*kEF` zfGcw7TB2kQRVl+Pw#HHgwt?~0Sl%>llkLHa;@K?6qiPW=HRMJmPAAS@_%;c&tcvx0 z4pR17-pECiqO6J>R~Q9vn2qI=FK(I74S0~iP50T`Z{y!MFXxmCknlZB+lUA3SNv_; zT%h8F%5VRrDJcL&O)h5ix(!RW<~vV65Bj>nrd+un)m?cYs~Mi1A`-avOtS@<2L=`P zo!<5#bF(xV=KxwPR9--$vVNQ!mj7!M%PiunLOr$tV2Icb`>x)v6xjn%k+f!oF9|~r$)1`kqNac_Ju$%TM z`+Ow2JB*CYmSv_c6z}cfq54 zE!CRYUJJr%=ucDyvjrPQ(Y%VF<(nRts9e*Ii2d&cs=hgE{6v=CC+GqN!2);AvC_}v zs>a0`8{>%~K!Fe{qz0|Sv6At||5|xqnq$6?`03zV$53P3Zq4$*JE;+aeU9QwN~&@k zDEclQ5IZ3}$II_@DWIR|e3a>T!0+n!K}*fASGa5vb9(fdth3vZHf`p{tN#U%{Ur_W zG6Vyw^CCiRH`$YYxp;tF5Er@!+!xA0kjJq;JJcE%78jWI;DvtrG~}X=#1c&Cjq21^ zv2gA_!WKeK6q&s(#EC$7}b1eNzuHB)YNq8>&-R{5|}gNPz~vd_xe>B zIdov3XSe*s^+3zQ-F@P25uI=D+dUbcSR_czvoexMZfxb~R zt2(+F>*^HqNYHsZ&)@2$QcZ_aN=y%TV5@Ucrr7MlD#I;CFM#5mnoqNqzMfp#BY}kB zHII}U0w}A}yYXbVA?HD|Nca+XQW>*Rv?DBlx8X;57pcRW6TZ?}6_wY|_N>HMvQ^W( zp%VfCoS3SHtPhGwEYf$H&lxiQM#mHJ`N-MPcvIC-z;3XYn;IGbA# z8Rx$jBW`5v4idX|`5X-j5dij|5=59E1&Vkog9x+fH-x{;^XxJOT(tjw)OP)5Xh1aE zk(RyA5X5M;UlH=9$YAmj_dXbuXd3lIhlIm+*2g)%D=z1M#$V5Io)YkCX{1!d0Vi zS9xLJw6(?!_;0;blR#iVQ~wU$=?HfU)&v|_W|+6zsjzN%VF~Sk&!mKWpQ$2bdKnYU zsiVNboI&srz0&8rwEJrT3U5e3XG%J|uo6L-$~f6DGA4^zn>IIe*UjKhRy3LXkMcoh zOvF1wDu?q12D$#ONYE8H!mT+Fz$(X%eYUp>rH)|~V*9b}GdoX-vUVC?%9d=`C#*fR z$ewAPN84g%C|EdaD&tb1k2pyv=T*EpFiMB5(%8wk;j zns3c*2li3c;D&7=G@k5a&dP`_TqpCz?U4&Lr(;jalUpxU1xMfp5gsLVxv{+YFXL^Lqh%{&-)wPB36K_dRT>$PX6Q|tuGtuH&9bu;Dw z&&$ZLCM;m%z@5&*NFc^#Xqp(?YCj!9TRSKBGNim@tv02iHutK0e)`mY$Q-#|1)obi zAYHQo)+dGO(xxkDx8>COf);!PL6@wKIld>&DZ4R#BOHo6d8AUKq-;8xaFWbtV=Ayx zS+lL{jtWYV4$7Wyiz-sa%WhHW9W3!QXtB z7K!lw_u^!@jPgw3xUwEcKDusJuSzacINstUD)ff3dfZfhEKMF+x~4q~P%1E*12)34 zAj)aMj7_$l5N$%S1jZ1_Wk`ZbO0fAazSLE4oeCXYgrBx7`M&W}kuK&?`l~rTPN58& zo9;-@Z1PIp;6-9k_9}#=izh%b#>gwx0^JnwYJp2bZ^%O=w$epgTyRU02v1u7!g$bM zR#XP(ZyNo52!i)rk0(@lUE|0Tj1IT9I7XK zB=4Uoh+16F-0+40s|T|Z8E9@-X;jQ^GtVFIkvw7R6uF^ijOD-m*)Kjf?WzXsH0xAP zs`*%w$uq|cf0f+Q)Tz3W9%u%dQ5-?1N_*#Eh%fBZ0Pm|Q+UvB)vvS|c5SXY16+Ub?BGv3PO)i&BIvbK-5+ zxt>)ys2>2cP{{Fdb8BZ10AiMSWDODA=(B7CE?pJw$U~WZ7SCalBlzQKaJaV#r9erO|lh`bh;n=c86@ zBF22QNgGQ+;@9rc-6zSwVdeOt5$sGd8p{j}FIyQ91Rh4{dt+bu%Qg{qqakT7O0cs$ z!vrVHDHvVznzyQV9Qb7TTH*I^-@3K`PyhMTM}PTK!N5DqJO5nxmUnCK;)iv=ySDAY z`wzam{`R+jK7TW0-~Phlov)vN0RIcT_yQr-SSm2UqoW?p;0(X2smkt0Gu<=YB8Br- z>u}?R>W|gdvlNmk-EaRE0WNu|ja_J6b1YQ#;#4Z@71ORjjPtN`8GF)^T4K7EnVAk| z!5yl(S?~WM8c$na*wQFR6~YN=G6Iv?F5oeH-o+{3V~EeC1CG;Q}Y^OhyT zTtxTBe4y5&VRo9V!-k+)_GoUpL7I1+#hWQPEczHPU1Z8GPV3eJHXHl0|U6gI3&rhf9LtoH6jo-`RIkV4eUVgWf zggp=>zZ;9aeSHbn$jkyr|K*F!@2+<Dz*4)^VTOtiC&zU%e&1LAiYd^z1Xu>HIN}Z;I2C z>y2!MT)dN18pj{*ADgng(~Tru9I(H>ycUB~BKO_Ud?l=3y1dpa@+i5+-q>V1!Faa6 zktt0%YY{YWd!+d9Hx*_}0ORoHvcvo0N$x>{6d|+$y(;F5{uD5CpXqNCBD;E*b^ z7RD%U<)ZilP)NF{QnvS!HbtFbh--d6f47|?h=t~5N% zMV>){J%<&$Y3;OJ)tCjt7xj|1B<)iQ&wIX#M&P^L>*R9(vTnzp!oN_t%lIZzWCUpqiz5__fus=2vB0;A>8k|; zJ5`gP_=YMoNYNn98%^!H?9QA z&sgKbs7txdhJ-5AFd#M#+gn%0?so@V)o9%@KB0h63N!tbyp!8YvLEUs$|YB%bn2e6 zvqEz|;-{a1;owhaV|mv)oa6drK%>U3^%XhA=FD24WQl#VDtdO9S9d80ZOxPb>`h+! z4xp|T{;F?}a;-**Rl73c>dzsqShaVYc1`DMQ@j%)8^3pA{%&%v>%0_DP+nhV&6 z{Yak&n1m?l!ndRshUcN-q3m9IJMISV_G}D8ILYa`O;bN0)G-$eZ+3mm%XM_uuzf8} zg{#;a+~G5iHM13vcWt{v4wv7aCET8F+*5kCr>_~8Gv9PIQ8#X!!>5@N$8vv~CM*Dr z8l^LLq`tYz2LCt1Q>Dla>BlCLgv<*-{JrO=^QT*7=g}y`olP0bzp>~XvwN?vIY&lY z;EjYAOC{`kfG_;USupV1DSnlNzNLYtt8m85QrCK?T`ge8ZLR|po5nFIy$gl%NeP;X!F8%0yk9O z{55iOwBW2c6-+KLE$G%f2$(LZ8qWIM*Xz%KZ~7GB&=XA46%im06GZA-$Miv#ipxti z)_YT{O$D|aMfOZ@N5VcSZ8LcvqSk2@{kr_>_P&Lq*54nc5^uI+7uAg%?8I_MlALf^Mw>Oc+EP|eddg2XSmVp&3tmD!xI$ zI(%SBi(xVU$b47hcu}(XntI(U<6d(E07@FoA>VZQSVr4+DT~DEr-}TqU3yIt>FjY+ zeVdFxak!N75tFlH^P?>2N@nSwm{E*i!{;g%hS}s!9PY#|QCm%zukH2;RGvg>WfgyT z{<$DlIz>}-YvTM>RX03Rozoj;GhA`{8hQM&ZSRPxBs6yoZETTZBB>T_a*T)uBo3*q4t}Blf_)+)3Ty)1*UXK962~P6Fu3rCAvf*<--`CVf#PX z^W5b5hcq6@#SFAC_VjibY%a{?x;{g-`ClJLJk$S=FfVG}lIs7n|C_7A$0oJ0mXycx z)iE`{&SDP2Rkx%qtg%CB@+Djl{TE3}U3sXZSyO|&G<;niuyS$6w~;wR zh4)yvN0ABDIC3PT{n%3Wf(1uhhzLGd#h`bWuBF!Pb9^5jFPbYn+o)nAjid2)*xM?^9Dyi=;UE({Ym4qQ^|hK}|56cKKh)uCxDr;fv#2=<78+H(QvfVU~;F37Rn&_6=~yvS4yo39!p%rH)D)M_c{Z#_pnd=aI5Jwf_ndS z9}@%M3mZ4nL3+;vj4Np$&ePPigKJD9zig$n3_=)jxEWZ;gKT}8_Xn_G3#OUYxlPks z(X#+z#ge5P6E6o=J+J>?D0^@`K|%PqVb193Ukg$oOgAYK8%mMZ01XUx1<>lhb=y)t9hmC(awDijd)NM_3eS))|;Z$TBM=4Jw8cbma_`<_W`7`i}@j_95e6hre{)Bm3 zM0ljAfm6ZB1)U8uHxSPQ0M?i^9S2W?8f4w!Q9D9Mv(vH%hX&cr!XZ^MiAtZ1f<-(* z^mlu3TBLRX?E$w$K<8$CbX*gi)KJ3jN`3#N zk$d7YoBqEp0OR?HMuBX;MU4_!qU7oRd$Q!kNvkv&TXOi|J{-PdfRj2Og?kKhGaVsK zj2J^V>0o?5wqvC4*Um-Tk;d(W7u)4gn;VmsF`$8W^xu8Zfkw5A>rDjydDa*UVq+#_ zm5$OH^_#^+AkIzijwZ&)Xqi&eL5h?(cxk#Fby%iYIQXh0H?iSiIztR7+XMOz1f{qG zjlCnA16yw{N!om6(yac`+OZRR^C;@_S91w|dL5ii!DdP_W=QNV{8S>)tA81~mSuSw zz?R;q+=8%8EMAmtnVzs^*UXVuwgxdmCV+|2EoNsLe|!N3uNRfmGQm=0jMg!}(S7!8 zF2B0eF5?nvZc~dojs&+fHOz?*j0E6pzyf^h_0B(D%+ACs>w0GZsgVk8*H4EOiS~s? znX;Xq;*Op#<)@lx_e*fC@3Br`_z3n?`osgJTMV3Y;U)#uEZl`WDpI5sz`jo@m?NMG%0)W8eq(AKL^(FJ=%}AajR>9@^=YtS|)>`N|*0m zNvdS$hL@Cq-=lzlq(xGF0br69Gl-_smz3mUq&jjjCEC|c3a1sBX#YMXt2<>{K1#P; z0pmaSMp5TdeG}c$6cAziBa;8864g>K0k$G!L0S=Y%G=V9*qR*PZ6$Ww(qaXfaYA+6 zVU3&HSQ}o)V~dCdwZ|imH@`5BYtH z9eg}VaxSwasqDiVT+wO@B$s(ue*o*ppF$l+$${|C)H} zCiOnLDsu4+idDC#YlNxhn5_fK!?`#FX!+YkhqpZus7bJm=ysZ0=OX|$B?U@T4IcU^ zQNh#xY&f0fiOtOc>lW;6ql~}a!3d_VWl5ZqQW>uoks|o2!O;(+>70X$=QCq0Qbhx6 zjN_c@kS?Os!0JFTr_Xf;Tn=5`gDY~r-dG~B=K}u6T;(ky*?;PUy(k9lBac#Wm4~D| zGzQ7*=lRap0@To?Syv`lb27M<_=^sGT#(ql(&59NmUbs<1C9C>5|jwrwDsf#OnE)MZgwrt z#Xf6XW+SW~(EIBss& zDbDffY#+I*eQU)2&fsxs&*XQx6!*Sljj_rx{`V}wt*now5*0&j zoiY@#7x$M5f#egO0(5+J`rFsBngy0pAy#burJvNW&oHX$7p7eGF^0M)Utczl3$%`+ z6;vK7voxxv!DF@)N<9rI1-P^A9;~N~uilCXMCU#weunFs%wM@u16SV0%yjyvDa~@E z7XW8xH`0W7lCN!AbLP3(`KvRS+h819TUhbjyi zC?K{U)vDV_^wQ!3o6qc0y5~P)NUCAnD_J^A$W=U2@0g1VLSEWE-n;qVAAZQ)uKDcl zm8~zWYY{GOZ2U=6bRe+$?D6YMF}%8O?V=lK?%YnPk!1EcV5H`8fL3wjQCa3!zgnch z2tzxSkiFTL>yQHNaMQlQUJ$y8F8$Z|oM`gXh$i70^Ae1f%U0L>ypIW%Fu6}+OYL9x zwy>$=D_2#Jx?-3q)`JI5gHRO2amCrdnS}aS%1+#RhFgK_R0)wmeS)Sv79jo?B|5qP zf(iWNt}`I%W4E^~h4%l@^!8y%-}@i``Tjb~vsJqD?YqkI=4_qi&Pr{enK!^%XSuDE zZJu=A*Ga*9OjHB}&gRN(mDXGtD$ugzm^W0Ys0d75n#qldilRb`3Nmq~n^&H2_`wmAK?CMmDVeXB! z8I?ZWitK6u9B^9Z;pocAj7x=B)fdf0O+17wD8??9azS5Deh#+sAqQ&>A2~6IyLd3Q zBBE}^@#FRpY8JqgKIym80+eY`o1_i?^+m=_vCXb0{^!Zt|NHSSrn2gV4~?iXolcXU zjamNUP#C>^4fA0OI>EafOV#LMWW|6Aal#6CeA=7Y2B4Xt%Rqt*N>SITmY-#r2XVwI z_IXJmkc|U^@1SNB=pIJ)NSnu70U%F-fN^5`#Hi5O3uRC`9v@C|E%~>8K>dBr}#q4n@dytX4dG9$uC%2Uzl@49eEQh<6=OAX3Lzh<`a>^Qi zsN^|ftTCpxWLu@Svk~bP+PUc?!PkIp4dZNdkG=vft3pCUWrrT4a(z<y6=uAoI-sj#)~Qb`*-jE zeS$m`oi)F_=;A%PdV&_l26w$#YXIxxP$5DPNaZ;f?K+xi*vO!CIVTIa5_XNQ@$-<& z*NKVTm2~t3S?K{x9fMGT5vkVE6g5Y);ps+YL$om#wh|P%N?kBsNbHvi>lJ_@rBtI- z&7%AxPFHAuX5WzwT4>Uki8P)3=`KNwg$Z99uNb@Q_4xeoL&`9&I($Ji!GE6I*l?zi z04SeYm4|(U2h`C*^nWdf+Wv);`bI-)mm`G+(7G@e5+70BrN~sTSbz*w}(0Ln$K!c6|C*KuF`QkJR-%`|Rf z`mn6lLTI+4fU_;k6wzcN{-P!*>2Q=JS^*G3tDo4_#Q0rF=r6B_P^T!9q`^>*I}=6} z{`{zxzj((#$eNJ{gnS2766a&3MfIvqo=oe_w5l2h-S8@WL^*$P$u3VONGOW#ttIa_ z+)O;t8c|~dUSmSGRdW$aYnc#zY`m`^U?u4%mOAe48byOEuuk(6!`^PdgrBv?KBL+s zDfmaYXz$!c&qHb3nCIuByVIOIo{Sk)K6U0!PDjD-y3#FZN~bahP_1i_f@2FtOceEvqTujE!N!(l@;lSsCBWI7 zJ9aKSUY0k%V9Zf{KlyYQ|?+HYQ+K>^+4cevw_t`l{ zH`0uTS8j(Sh4HE>YGGUkZnnokdPEb6gv-$|Sr%k)u5)01Yv)|jb=9suxBxdF6zQIg zq$^1WkXh5ypA29Z`~lePB`rKb{gSL5HQ4rC9Re7U3)Pm&gamBBkl9mR+RKfOLD!}a*?$+bFvd+dCr&)8 zzSL7-%_E)yjz?2B&{wBdpNpbJvL=CKCILd3cqHs#0I#i9|Kw5g06Y6^L|4P&XG_bE z>jX!QUAW>k`6bydri9h2I!KIQI)E|!-q@nGb_x_}`V#vCHM%3Z7~TQE-+xF)1#ix0 zc#j`g3 z(dJr?aoRsZ;>#VWOemuzREKetdY-w`(Hi%(se^hI!2GV?JAE*EMNzf3`TCNr5LP&M zvwh`x)*@+oCg)n~(x~87;f90&+)4voBw$nHqvF+(j?%XdLfszNmfA2>2c z2~<(q%=S^$oOMn#(k@?_nN+2({39!VWg#;2vJ$g+o5ZPT57$w#i$K4PFMk=f#2KLO zoY@&Gj9X}$1qwZIoJXx{d{@E>XmN8NS5-UFzY6qcjC|7Ot4`H$Hos8)n#Xi)>FrGA z#AIEIMG>Zh`cpUgfg9f6oLSInRTSH>Dfb&P4!ir3Iod#apwul`OS$rFsM4i<9YtIP z*V4VOSaq26e~f$oJAP+S_dShd3;0>dAWa5 zA%b*k#%S%fRJP%j;B@;H(_R%gRv5MbiiVfv@oIoxo??1awrl}};eXm8T;2uPp2@*= zeX8o?AbPqhX^D+RLKB`C#IEbM2F!$E;F2NBe&r^f=qz`YC4E&;*+GQ& zaM2a~9k{!LR2fg@qgbP2|L``iB|hAPO}I%Yb_;Exj(Xg{x=XuV!b+&oHUs|IcL-Y%kR6Nb0? zz-EP>8H6uU9`4?7+rS2_ZWHNx!3-wYd^n!XICCKSXa20@x2<@~MhlVq>!VfxFiI5B|HGVv$gQs%bJQ+JL7Y>p^+ z8%?pR`xF4DKjJ)E28v?fg3K|+H13C_z zu#FutEr$U~Rv$L~SdMb4*w5&OQ&w%dxOXTtQ>X)C90J`?UUj8y(bzQEE2@5Oocgo+ zeyUaOFp<~8bhHK$HX{O*j@+qG-w+_PDqDG_)u-&ILDEGS*IvLSb&l~QqR5rYE+A*m z!V;R!qW3Td5rz5RRqJP+Re9J-zAs0RJbA509gBNJ%jCBKiCW!Na%6=il1|}V2=aYd zNcWGJq_n6P6&Os`Y`73a4J)@=+PqBk@B3JP{~i4&aAL4uaEnKoLdYoJvb-rKR77+Y zbKJtOyaU1R?$Ics8|TN}d@6&uUedjS{O7=@f?excbevT#^pXtG)rwjf5D`g6Bdf+9K%Jdx zgSx4AOEo7;*N|cv>(rTWRIiK&Hr20$tCGF04BOgT7Cx%i4iveEEwExj0QELFTLVT$ zBx*~aSifiN2sWK)-DNP{kRDr*C|UX&N<>z952;;|I_H9)Hd8_l0$JSB^jc4@Ek*c@ z$)!>Ss3IVNex7L5oTZ*dv1xWdwL;d%fGUPMnxIC^iDqErn7Hc)N)iUR3*zJW!rcR> z!!cdD-Ntq?6ZVIm`)#-emUyCU<3Hk!`g&-EAQV`bLqj-QQf$RkqV1Z3e?gnu^P#_7 zwfM3vvX)JYuAVB?r-)MCUI(eV7Mi^8v2wCM8MP;hu)@bAz^ z-URNBcbA_XgMq`p9W@TqwnKzri~aYV=1#+E5SZ8vAY$oWgPi8khxl>rq;N{I3%FY2 zzTS~w&$Ur8+e@xRI|!Hex<4a#%49G= zRR{U>i8683;N99?k7J{jzW@pb_XC2Z@9)V~VXr^gyCgEJExTl^fM^zR+E2XPX}_o} zu#zb%jd#gwS6HEOIpe_15M}Lvc(2lz#sBck2D`^bqpH_eYPoha1Y?WmTE_hL{ZnR{ zwtnd66$`!d?H;m_Xx{{RH(ksyyp=iR{XMgT<%fIoxyI-xmW{hgFb-@I08eQ{SVYu~ za~=wGj2ful`i6L-tc_?WsicsR1mTeTtM3)f6(Q%dg#iDeTOVF1NRDPBI+Q4)eOef( z#tHW7IsvZ6@EbZxB;Uj*IAX#X^N4wNAb|MDFlk7Q6q*)@ItCDMa*cPa!**9*H8ho!!2qV>+1N|MS-hY53uts&^l<6)TBOSL2k%27Xu;*>9}Ks* z0%Z*ko@ZvW?gL?1=q?oSD|dq6QUX4rT3%+GQ@R`tw)fdzdWO7gq^m1|>d_euCDpbah8RA5 zZlxA{S#DRVpw5hU)2ba8Rxs`CWKU@#;_5l!QP8Q1>26!P%kl;}-CeDhcqjwhSdr*_ zMD5n(f3^U+nw*s=LpW5&vgo|8jsL9yj4s*p|5(>2WWg&JE-S;TVh#}IMSByd>ol4w zlm4uVZ+e>=PysKntAU%xX3u7>(^=P!Bg#abAN49cj*G4nCMD2I;_3ll6j{$6wC*`c zEYM4_XGKP~gaE-mL8bIAQ{C0{L>o9-jbyW}YqWx&V?7Z~`oyE1bmp()*M z3}f1?ltokpp}<}MXfB(gR@LqQVO9z{*W2PNt17fj=lI+d7|iN23c{Ze;fp}LK!c^> zrbU%!PYqHOOR*KxIYH9hiO@jUaUY=%=v*=I)f3@1^3uPXUgUlIK0EH9tBVlb=+d7a zS7l)b8?V}HDP9JE<@ckn(SBeVnVw-brIFza?F4IZu^^clH%vCSKkKaz}+4%jEis z#T+)UcZrr|k3HHOKpA7cG%ywA!H4%KGH8HuTcH!TflzBhJq4h8;nvUk$$`D z@mfzwS8I`0?rF5TyLM6hSmN?6|n z>H-ta5x3~BWjTmzd_Kwy_U=i5x^~D)ZDtwpL#|gwC^j}xZL=yrXl0A{oOdHCknjoz zDaU@=LWIAk+LayW4gE^2No+$9SQcC!*>F51$w#I3C-y596P@lJ@Tktd)|Xs~BF7fu zzYO|aT@zi|(zFE+=iSQ&ADg@j^wBl89VSKBJI`LHR$yzx-OC`{Vi}^o5sa%=6hKS# z$ZWA?YCQb#;H)?xy5|oq{i(Hs2B)jktoVAFu}|ObZnQM1DDG6WED{`1BX|1ejI290 zyKL)ChGoIM7`!24u1gEHU8J6K>VAmNLfMoLw9DN%YiI6xAUWuGmDSgBZcw0g=w5M6 zE(*Bc;c{T+D9wc4nzP-8MA<2n`DSHTe-2$gC7IQ~DmlR%GHuPYRhE`xKZ}|UXc9tP znv&^AS?@8a9>JExUyipHyns@nMvLgGfpwB=-|i(<7_N8St_MgSnX+<^%72@A(tX{_i}xccki^%U&%^QN&1`o)x7w!ms*-9A_IYbC z@dJ!|^V~R*A+qAfeSC@AwVW**GRHCEZq4XmMa3meQ+tKP!aIgLi1&iYak6O0i$gH6 zqe=p+5s7?ML1~}uHN2!^65(5$Z#2sQIM(lIPHKU{qOg?*6qsyg7t=A18H$v(2` zr>wd=Hb3)cjP9z5F&-s}jdqSs{y3kA1@D$chHs>bw}1mo{2cnN!A&LPc6%;Evo;vA8d>YQS@~*^iitoGZR|i( z`NW;#;F|#`%0XIMPN70KD-1ENC88$Q4{Gh;;=RbCmr&^KMq?{+qd9Rc zym~`UOnIF~Zc%*YaBpfN%>Y z6Gcx9VF zy-32;T)&%@j8h&jWF zEo4RR7iv6lO3TP*CNm3iGqto#-3)zGFO308*K|#>5AwMO1sKHR{9Hwf^v*TGvGo^; z^?u@NxSG0T@a(x}41QKMqxhz6UvNbVA@vY>u%nwE>j`^qfUXAL@@#R8PoczrHeiyjrXV z%wMdvp(1anm39@=4dBRB2qkHzL!I}UpAQI?!k0$pPg?sUY$354jv^X>({YwRIaX`x z0*#xyd1XiT7oQrBTQIlKqN0{Y)>~4MEAIeI!{EAZ!*`1OYJU$WMI((aMC+WaaQ#z? z1*sce^3S-UtY=?8kN$Z-;b=qKR&tk%?IzRPU|ePrK`S{*SM}Rdr#6v%`qnsZg4zNs z2IKz1oBDM2%?-)&gRzDuQg~SL-!)^;q>airYt7|%xmO+)3X|sT2%u{MIKBFEjRWCi z&&_iNhzIqyT@_dcv0e=i_SAY6;zXgH=zUG|!>pyYxT4X!F|OaalD6inPa^P+dq`3mu2HS~C4oXVATVproC?nuKcTi;st z8rm1y17h=lSfC%I7dn8?#_3YQGHIqH{4mzlxi(o;1mz4|IO5wUL^ZuZ4i&_`kOA8g zs^A#bm`W3>SuLc=0GNR>-pN=WXCtH7Y z^7WO|e&E9Wg~ghzQ(*i6pbW{DMCg`5)jHHNAp^U@p~gcrZm1ep$36GT#U|R)Yq@1E zW~lpR2*GL^Faw(d;@=#*vavSVPqwS|M2pD}MjVf#5Z|`XA~>bCLa6!aXua1I@;h}i zZDofD6yXM2RWesbEY#1aHc;RXgdkU>BW2k8K*(J)2rp`l24Nv#--^+B-oDvmladGR z(dP^=jOX7D4y>a+we}Y(bwFE{sWH*1^61kW&*Peq?&a7DC13HKTz6{;tP?+$8HepH zkX^0~g>m3f2|30UI_`VG(3_g1D&Z^o^f}?xJkLO@H3U{!6$!KIUjQUK*4I#V^Nrdhs zSkXjl3wS;D%a!jYa`RE`ng0Ze}(zyB+L)AxBYh3TTV?P#mJ)u6Xy0Q_0wF zwi=f%RfY->x|Z&>gl=~Pzx`5#=01=npr68ia4xv0M1L=aoeA8Yl4!)QHdrqU68Gd^ z`~wYB5F!*I1h$JkNyY$z_1XR`36d~jz&&gh9=A(nlEMPk109fN4PJh|(%#$Opw{z; zqRqX|R$B3HZX*jo}RlM=sqqQE`ndlDR@XIq}|ayvy&8lMktaSm~{8 zx~x6)#ZO}mKl(}L4rD=gH=Y6N1=hEQG0Drof>@`1hJa~aYeNg;KmGGZp8Nde&A z3L}yx_x_7{7JS>XpCI3#*t#P7_Jr zlBe~Gv0u1qChY&j&v<53>gUVUtY`1EpjUDE-G^-`)T+48G1`g_MWmpVS{4e5KL5bdT=5NfjY$aYZX!LvxzU16-Cwg7IjbH*MK* z<(wgp+6wu&D8xl`u zmcliPe0%|jFyQ!6`K6Rc;zUzD>!ZEhksbEsUmKsUQE|3|2JgAqH`D#GKMwAgbzxr| zKGZa-R)LTayB_1ZPs&77obx<=Xa*H9}Ui6(tR##j) z+I(zkMI;o$!in(hPaHYZO_7Z; z_Nk}PY7H`1pGCi@Hel>|7pwK6$Zq2mzy$X6Fr$=RLnx@?hgjlzMT-$2R^-v_Q(36k zBQrB`n5Ix@LinS|nuxHU zs4tL)7Gtwd_9I3GQRw|}#eA35550lbGLZb`+o?o!#|=xfB1ns_A9n<^WLbGctPp;U z(V^;ZPmnRHOd>jUfy8Kg9&*Ts|D8M!t557&d7OT9N61n)=_xaN{&&))6yvD{Mk;eG ztZA;O%u!{(aysCbz}@l9l3mth8;UNaLdM;E!NwuUd?&pB$L&=hc-efWrS#wg15aBLJePt{bTR0{E`t6Jati0n!N3{NXU z#J&u$Q0i$HqajdU5Xw;s+-LvhOSyw2Ad3t9k;$B=L_u=w$i&?@Q#`yOtaK4rz|C)F zNrJP1l5KkgxY=pyIL{D#zEXP)3JNG4MUu~&HV_Fd$DodjP=p!UO;FwZkalbNustkz%Bj&G-`xX8tn5M zou{~ypNH%ae{fx}Ti9$bfKx~@X^7{!eUDU(qxchp21(~Yx4p(T#$?DC7>Y^o6KEG) zD&uc3Bjk8Eb9VbXd=B3!pp1imMNGxhd}A#4=v5i5)yq8na<{ss0d_55P(OE?25O4R z{xyK122tbi$8qF#TnX3J<(4~9Y8HInGtXD$b){-qbtmI1g|jCFFKZ3e#(a#3?kwTh#ZMnU(*{=&N5lB%^A zy3~R=pJ&az$Jf0X;$!^74ax)Op{X0_c2sNz8r0?tM_kdex=!^xoZBZc_xkF1|z~ z_)V1fus$(x6;O*)d|_DgWjFbf3V1T%Zo5SWyJafSiAfS3j*Sy`E*-AfX9;qYR`n8) zbhf}!CIlQLIYeE z1?)uMJNxUcAHJ&cnK=*bihj6DX$tGDK6rqA@a$ycz-0=j(8xT9q75lmm=skP&{pnr zl9VW{@}l68s@U5}XD1nY_LtMZHa#2D;G7U z&19&%mw>IhDYTwmTBta|MXqoJ`2XS03x%+Z z&(4kggldfXT71FqdKfc*igov&z%`GnKAr|x=+7!lT1?5-WWv~Gm!7L?Tw*0hmi!wi zhzKEQM9pucm`yM0?^1Wt>gNM5;@UXYcriuL;$Oafu?qBo&&*EaPu+!s_lMNK=2IXa zH2V6i+@uOPrU>|#mA8T+^Wi|p+YfKEfA~|Anz8VHX+$YxYGa9-26ch43uK@7Juj6a9F@T-Cu5$jUd$np5 zIY%lck5~0QlfFgJdiX+|89g<4VMVg~aQqF0yg3ptT+(JK)hs0~{gO7N7eBiO(w zLLb$=f$*c+eh<+TpMGxAe(XIm^xA0!Z)|6BRksj z)FQFha~0G|0rA+f+aY`Ti}_y*=zjtI)R>ITc^0K?+sC`*1*+J|!d1QS^QkcW6Jde( zr$S+RR`!`t-l83f|h)O+`$pVPfLO55ds6D~}W*QvR#BHdfh5+A<9~gLiA8}oea57pWgV#m-hNS^NzWS$M8bi?A@Zdu$_R+l1tgEH^q4Tu8QJ>Du4Os_RY{VZs|5O1163ATo(C%y3y}t zT8d`x3l+}cG7#Q%WQE~F7KsKhK^)>1SOIbt;U3TKu5Cs*K;sErsfORC;qdT?8Z!MZ ziCfho`0MuQXg(qSDwQM(-V^LO$KxAmm$mQ@CP&~ zD#-cV&u?q<^EkUE`UDkGrXN29F2Uj|y9jrtTQaQ}Alv6aQ4N88Tit{P?+eAg?WnJM ztj=&7a*mrX|C{s|Yoy2JF6`4*AEOyA0)b79m}t_~TTSEjEtk15K$EIObTVEj?&|WFyHGZPMyJH`FB;B+VNTv?wCgnL0N-u+_~E6$uWwSAVUqUi zCw;;VydYn~*CQg5Pb@kTxhES}I&5Vd@Z(-TRiiJR>%oeU7#+v>9;9)C``BGLp1s3B z^g0btS)l<+&GrTm7Tx}mhV1O<2-|#j0T$#29xbiXGtT6L3~+|Mw3>m0R0u9Zp^Ha1 zz$ERvVg0>Ct?*EK^|O*_e51M<1ir!1>PJ=ms<(iVq(`m#s749M;=cS>t0JL)g7CqF zS~bheW{=F%C%_Oa8ROiyPC>=Xk@QWl)^VZVs`1qmCf&x2Y1R1*FldiE)HB}) zt*9%q{o+hXU0g)J%nD~*6i~dtTm`t(f1N(#dQ11PLadzI5)D%-*pjP?1YylNfXP;2 zD!;{He65;lVQFPFI+~Y(R{RQxE7J8*D{oKk8|q|6sil0y^G}!H9L>e9)Gg^#;Nmh*%W?gVkXx#do{XGm^Y;)2X3_An_ z&TMslUr4kbb@NtlTJAcT;T1zJFv=%@ziPFJ`p#yZsz7uA9D=Lk6LZaKKC?1{+5|Wn zHm=a*TcNmQ77rMQgIs_5>t8R8d`+(-T}|3gfv^1$s8XjaPA3#w80(5 z|1!90(%mY|CptW4Q4_DMRHb)?9VTZ(QQgG95@iWlrTT1L9@uYa)nMnTZ+KizUmex; zA*{_`2y~5Rw~_`xP__Y)G0zJ6#?P0eC!Rf!ETklU59bb0=0CTMow3AFTt@j%)YELDKI!r)OCnen5sSk zJ1kStp>Kpf+r0VA0^w4o(Ps;+NKS92sPh!#qB5dXz@nPF0Qm+N*}`&wUUoX|i!c82 zSZxN7(*WaL`3^l0el*m#Z=S$d1i;(FL)|Nex@W5osZO|X?`pb^Iym3gC|HKLTWYy$ z9+cQblS$s8JNtdAH+TH`>wCLKc!;T?&mwf${MHFlD#GHDat62+j1pZ7`z2l$E2Pxa zAZb9;7jPPPXJ}qYjUsz>-WoGlYR^kNV!}bY4Yj>zq8_xF0{qhYw=5w;espxl3gmmSr%eQn65f!?#V)|hi5IQh>uS~RhYSd>-Lqnn7pT;K^2SYgM?+c~woSBod0PlRu z$volRlg7F8W4)fxm2IED>>@cHG^FRN)+ESl*P-D9Q!NnJd>G*ryZTwyryD9`KsxpL zlH6mA{RTX_8W+`K>^krGQ7AVEvpmZkQL;fR@27B|ECo$>tist0Ao-fFC(Ae!qQ9Tn zw>$$E0NAqjKQ`S#9BT$%M;NqiTzz&#QU{au+IttIsjTarl&IQTO2fK=eTTGxsDJVl{7^q&p zY+3s22KEkVpaE0JE4^7D#R%CLc$O~x3JRc;&r&)$~TjHSp zC{-%n1K{M#OW>HR%`sT(x30ILPgSI~^Mb_c(lU`)l9ab*IiCOmRu?H*Y^J%uct7L7 z&6p~_TesmN9d6iZspD`5TJAtIjFZ8C`?I;`zpA1+QAuVU?IQ`{3B`PMbTXwG@X6Bb zAY%bXh{FmdxJ^u=Q8i?CVV>KY|4RwY}17^&K zXM<wAkTh{V2yQ%a!;;tCTAeljXse!0@T&vce%xAn*y!jYM z#BG);U*1CvOhawV<8NO4PjOy??aoeM!I21c+cxqt9qo~Z!1N3klBOWNlB%{pgf!wt z=iHz|DavGBZoxV>juTlnXY`-|RdwV*H}zZ~kK}o09ChNCao^OT)0F6Dd=EZngqKqR zj`pusvg>!wXlhO!NKDqW@S4sQ`%$9_j?HI@!)PR+pL7F>6jSu^UKUW~axl*K8r8!) z5>+8oQ_*uix~uZ#-e^fF$vS)*1pWo!;+Ji-FqA220UMo3rR?6eb!YZoQZ|d4ePp*m z5OKU|;P4-k#<6?Ad=C4?2V_W--AgO!F&5d=$7LCss_a?r%c3wAs!#8E&$%Y~)z7ks z_2g|d$|+A!d51s-mt}flDynJNqKJVmo0`vXe$*|^K%RRB#>p!D=D@iu$JkyBQ1Y09 z&b&e~f9l3J&iWC2{vD>TEok-GBQ^v7ogx@a#F>pI^)j`L*RDi_Z@RIv9fh`97uRKp z!dt9CE)EHf*Op)i7?`}VVv-=N&=F!^Ytwuq&UH(QUN5g00ZVwENOcEXc{-Ibj(nHB z+z#?1@;z$q>?EwOMl>p|Z!IRms|GI?{$UKq-)v*hyiFXpCG&NR_2I+$5Tp;x4wUw-mPfRoS`{2gVq4B}L_~7mikv@+C za(i~5gcIzY-5g$JQl(j+jrxS!*D|$CZQF>{C`9yXO5s`y_77}InzXYwa+M>Ts^~||fzC~QE1}@Ka!?54mpyjLg4z)(6 zz_`z+r@x8Y`nk=mf0z4J*q-0Hfm5n`EL<_z+u@08)N>t#Aft;6q>TJ5Dt|$t)(ALX zR>E0mL7HcoF3i}|FdSv%JwY|InGY_*t-Pv9@5v9Q;g2R8w% zb!JoV*}#p48;$tn*p%gYcCzFh<{I%BS2Ep_ois7-!^+~@lys={Z+nQ={)+-$G((F< zD@0Lxpv;7QoTOy42fCf45o@6UJ3ddDSejetJ%7sW-VNVkr8}_@{0i{tNz`m+0plZ( zj9II1Gt5)EnXbh}kEyQc2fu4t7ln^;S}V8AnO8u;;Y*pOBpGSCDuK*VrIV5ri5-Xy zclj2Qe8DmcIG+)1mHSVDOg(S=(V&Av0g+s3+z-33=Ap>;VAr39m#<-f;z8rz>W<_| zGYTX;_DE(2()(e$Z%b3kfVF5qq45B?>Bn)Cnna@qVDy4jwe@&Ux{EDS9e@Tk4KxIH z4EPu0u2lAL@o`GB#y9{()w|=we(Tum2`h*UW&g^&V9e354u$9INPiG3|2Vbl4?>~ z#&Gys~3d z#wdMezo`9M1lRYzj3^}b-$_1l0|4Otn!3g$Ou|Gc^mh?^6{-2lZ;pxWWfE!=+<^tw z=LWC;4=0(Yss+;Js~M|5a~Z45_?5v@%YW4eS59|xB9Sx+%7mCUcbJZK2(i*^t|JU- zrc>A`5L1X~%hqY)2FC=-#wBUD)derh1@fZnwvk1@(Y2|~&S&Z96M?dI+t%q+AVqiu zm-Mi|R=d;psGyF$_~EEvEN6XAJqK{BNCP`yry_SZg4{#!73%JB=ETJJ<-_ed zQj_27n()`zYkXc_si{%}N*QfX*TDi|aGW{+8>CKNOhjd-{9be7AX<1I7nyUOI*O12 zdGBl)IN)YL3j`I^k_E>pA&mLQv9TGd$B3Y;vtRlq3YRFeaW{vaRJcyLBMBJ&FC4dW zuWkfs2OrLS(pwNS;P~6k-QYct1vD}fpgE@aPYlNcwvE3k_}x=o4Wcs1ToIO)La(-b za7Uq5P@;z0m)k1&~)}%FNz2_Ju`PHf#BbyXuijHsIfbs66Pu_3`Am zv4!QKFT=mFFU1-izqSd2C>ISg^l}|0cFt|*Bg{kem&R2>R!1z~@Z1_p?Qq&%aoZ}6 z7iptnO}DsK5DQYi=rKjjn%M;)>xuor(qasfz~Z{pZnPKfUI zbdYmniV5$?T6yFKx*j8o*n<`6tkKDPO*UL&Es{;{BE>K+!28!*Z$Dy=pZAO1A2PbT z2{r_HKz)ieA6N9Wqtcos+K4#L*7qrhFa zvimH~tXE$cQXAyR2aSjM9A4_m!`|fgi0**6KRSP ziZopzS|4I;G2NuW zhZgIV`N7@2nayWXORvMuW|2gHP6^0T?wfgoRVO8Hb==?uM6LpYVmsCSJ~w)34hH=y zs9a^zfc-MR1e-y%bs>(~^bijP4F*q3d}wXXoQV``TyFp_T^tY!-VJLUtna)(sA_i- z)XZmjHPLJUl_k?X1r;H-hk-pECmZ+r91?;u>41#!gYbtXmuhL1c{Q2Mqr4b;cywe^ zq102_SlM0q0?U$XKyRwbZ{il$eM|LL_wah&rMOJ=&S3pMVh{2W6x|nm&DfbP{9nG5 zc})rDF3ue(9Wu)2$nMg9gnI zvM2;RB13@|s$l|?XfWP@Eg)_yfhwffLGh6ArL4&kU$}GSG>7dtw*Yu^aX;VT3{=@> zNs6>dZbb-%$D%j{Jq;m1l{zIl&L;gw2q7-aA&6|;n{h?H6c1R$L1>A%!UXi(b=`u# z5LMvvQkG3G3UrV4JoV??7@$smb2RAnIB|!A$7xu8YUsQ+Jw0O2=pkRl`Fl1hHm>5F z`4>;j-hwh_6AKRBOBMIrf*!08j^M6R2c;AH6YAo?yOBH9`4?^7qU;#cGjp}2mNs!L zGITdJEqHnRpV}#!K@uS7oNwMbQ{4jq7RZ6p4tpNWXFu5=`Q(oVLa}g-Io^F=6iIf9 zUYXhgt2j7*A%v%n_aHosaA0=goiA|vhE_#Q$Gr_J_}AL#JrjX z*x5immI*gtim?3o%Jk|BvN^xXI!__}mAm^Cqt7L`&sknJIR{n0UKQ(s0ImK{jB&MF z-WMmgFEiM{LqxuW+!v7&AAhLGGaDdlqv3+mJ^R^lLUh zkCQyb;JhVNu=K(}vDN7QlsRJoO-o;A=L7SR2TJ<4?g5G|DnCk0%=N>h>6Gi@e| zVjErMU-|+qse}V{(l~sUt}vlI8fYI}POY%p)vuOi*86W-!Xh`EeX@*1ZH~Y#r`64O zqQBRQK+a3$6Z50e{fS7aJySGeJ!XChESUd!KK-nc7fO<=?J3u>5cJmRMF*)Cc(*-m zl1udqLWA?SVdiUpRf&7vT3atTY8^v@M|o?{k@n4UV4clH>55vM{7t|DFjZ}ls{&`@ zeK7F)M)oI713P3CupgBa~PPNCz+h%)nWM>L>kthen1 zs-rVBg8T${mo8NdWOwd@aaJlr8tcYG%|Y9ua^Pq`bS|P?9~mue-zCei#(9ICm5iL! zyYIgd!}{RbZ##bb;eQNX$oaSPcm8nv0dD8livZ<{I&f_aqRMh^{qR}6 z{TH2LU_Zto3pbndQ)CT(Gc7|D(vbsXyp0L{+lkG0ecvhoCH`d@-kBzC`oY z=igkE7UKqqxOsyG68D%FCD}SY=Yr@mQlmn;H6ocJPvlXULHmwW2;r$-zT9f#q{QEZlj7n zySoumVbrXv^RV*(S4g#4;y&TB^m>kRP*tVsIUTYa`0K`If9{R>+~8v4DJy0SwnRF` z;(vDvm!B?LZqK5hm+0qoUp2pB91iiRjXFnd$Z~(;8fo8eA4QY1d?7%CA%v9aEjX@ny0cdQeH>}xCfBZO?LFK(%D{r_!6w(O#Nbu6!=Kw(?_eMe^}8%rg0T$U zC~JR0se6Ult<77%h(H&|m*YaDdGlwMs>S-ljT}%qBlbaTetJl{pw&H1KA|g%$8^aS zEz=ePiTtQezZ!+4J1hHQK#_)};J5v4IaT|AvG0_Ru-@iBxW3^#m^1y5VEJ9vE3r0mm<96^?39by8)+4mOY(b2cfyuRJRqiP0S(6SbY7DVwV`$5ZW}H<02Ea0gXnXHjT~ zV?zi85F6$E|Cg?JjcfYK+kW>J-yjP#S_sjh< zA71f4tp8fq@4Bw9`&Qi!yeRGyZqcdHA%+Kd=I~{~2z@PQVQA5%c;8U@>xF#7{bI|Hr>G?#qM7iPumXAJ8KLlA2fnoB zw)clWSTG`_(V~yFaE}RjasV{S zF%S;raW3x(>9gG?`3;(N-JMOIw}3XJJiB;GxT#_x*E)q8Ye2xPFgEDSoJq*=NYwt+H%Zd=GwePbxRk2s&=ewvKab;Wg+v^ zpCU7jV_nC}vJ-kwl$Exd`azxZVo9ZUh)0@JtWNJ78-DoQFF;zIqJMDvk3EOt63n0N zqHDH=M}0rHmDa1%&LOjAz5_7rloDtLXsdau={?8-9w5=N$EkVT<;IVoQ13Zqskpm- z@5vf=U|4z~F*?B<*{G>g9y9169?wNwFmWw=|sS6I%oLv7>oReT6 zp@iHDbB9Y|BCghUHu9U-hbhN7rk17v!ktRIWuIpEa~XU`8#*M(GK^lO@a_M73h~2b zRdsE)Rh^hP$y;tj?hbfh6c{D0DQTKLaU6$>uI|$dkZhgkVeHn&Df)>PayAMx^g+0v z>QvyRL}*jd7b

@z{D+tM{LISAKS|P(*oGdOJMB%if~6)ivQ^ zmbUjCk9uJ1$jj@e4(#c!p4&2ezi+JuQiR6ToqpHX#dwnFt6s$6$w14JWX167__n1^Xh}S7|2-L_Rcq0xcdoI{d?xg+YMqf<5^Wv~ zAn4J2in|!U>O4n4S;m=~OHuRDDDJzRh+Y+j;5}1bvi4&2{ju*jD4y}xclB2#RJ*k{`CBnep=@>W>oIEXryIAe;S(WOR4t# z&$1nKb}OJB`&_R3hR{Nu_mT?j)@|yJYT(L@(2Ng#m*AW(Hn@%}X#33T2`M;s&(9?y z!2UibUk=c-T~G zw#|Mm7N&0NBYCCoFtFUYK?B9D95mue&`|wPQH;3rCHx-#aO<;7Y>DqcrsjTM2z7_7 z3Qv^V>)riSD(#!>3;K>-T;1d)cb-B&S7?mVXGZ&eLgJDC@F8jlm?mmbT-su=;YUm% zwRy8GYBtA&657q`^*Xq4sNYT$nIeGU4Z&v{X%=fjjFjz~l-IKNyu*Vl+2vd3!S#&~ z8GS0dd|TZi>W-1pzZ*C917XK~tho^!pON(`kqY>Kjw>ig0)P_Boo(7zW16XcY@-V5 zw#PYg9r$Ebpnc41*C3s6^Z(XZra6!!yCN*waG7B*TEjFE;Y}q&g~{6qtn`zCl3de1 zi^s)w!MJv^c7;fmgGQS(M9v;+>k0O`@-+ywy@7$epj_)Q-JbZEm~VDdCbo(^8D0Sn z!m-ZPyDGw+k|K9q_V7X})^40vMorycRU5Z0^DW32H5jiRP zxlf2rcz9uq0j71A`eept#w{WH@-Tc&kuV2M367Uv+}l=`U1uW7z1D{i+Lj4t2Rbsk^i0$rH|D2`~qQ1IeU_d z9jqm-*thI4@B+aoZmBuqdwm>y7rda7qg$+ebcFjtc0HVkU~r-wi{;~h;)Z4;!p ziJ1`EG#dk_c4!i+pqq`U^)%3HDZZ!m4q^S=!zBBM8nq83B!7$=H7At}Jwng-V|Ro$ zGaz_NW1K1XP9K{idziaUO2ess?rZ4>zY0)!s?pA^*l}KbRCuc*!GDPG;K!QsrD~yr z{NjhV+s(CyYD6sB-gg`ww;mbKWvwlab3IE_>@sA&Da2;M_ehCl1&NT>PO@^R@zspu zn7u-LaCBYgR^J$jTTS(iRJmTzvx!0WmzyT{L05)XC->-+wd@Wwbsy~e0Oz`lPnO@; zmdsY7mM)goJ1HK1L#UE}`eaE>3a%kMD)p;70y}1h;8&=QehoQ7b@QmWk*ngLtG#UR z#i5hb#t^Q)tk|1D?l?@)y`9+g@>K(>AS}ra46>LI4)7+^be7Vq)zR0o@3~b9HX1$t(#~3q9>w zZ)%9B{~s=3TY&TK@d*=Lx`EcAsWEuJE$ZKat&SK^SI{G_T;jIVYJ1+x*i~@ zF!DP2f!2IaFQ!h#bywFO3FkZKGm~W>4FwEo;x+TCeffl{l^;Nr(}-0*WDdk4c$6}=YIJ>x%FqYs_GI#D5+!c-ew?XE8{A%!jKfo_dt>(*qUdiPS_`+MnT$2*e8O2*N9BfhOJ4B4|NzP@! z$q@kaaWxUZkV-+Bz=o`^L)jG4%WS-*2`PUah*lb)ejSi=eGfjYaH>~=-@KS26QC?*)2$K#f)AhY) zdRrIMCaoQ_)=7<3e9!a5H?fnvC~UzF6Q5M81cZ!!fM9 z{baq@_P~~5`e*ift@(x4B6;C@GYc$}EOmAz&>z;n+Od6`wmal0?kBUAirrN^%a?!D4=5VTy~qvo+!mS_(%GB!VeU{(pdl9Vzzz~~u3o4#in&3KVIgfiR} z-i`P;q>~EFQNsAR;j+$5vJ(NgT(onDxPt%|-TZpgxiP+{;}$K&_Xef>>^KP;7%a#v zHN|k(-nLA#1-dbtLLh24vz zUA*x0B!cNC#ru*%UlUpVt@ruV%c`>3**Hhm#O)0J$*ocH@V+{9%c~L;rbVW$fY(^OeZ? z>d1`4xW5Qpq6sW5tf4uOLmHaviKfKx%hS9w1L-$-m-bK-E zrRwGer>Sxr%OE^KT6BxMOnP0C+C4vF7Zg`+oG!4dQOY2$`6J8JRI2I9sJ^athy z&mZl{O%pnG*4_RyHu(@9^%nM){HjFE8#-Jwq4sM~C=4?n2}$sIt@q#L2a|oIuMyE& zgd1_TckzWE{`o*6xh$tL5wkX2Kz^0j6P004$X(W%2&+8}vL`O4G$^+`Kwt$0So-gi z>8bfq+@%(M1A>8JNwC5c1yLz16CB1hb16KSRJ}X|vS)@LgSV0G-n5{C%1XBR@^Cx| z?(yO(-w>{LOwzE9b@QpfZBR8X))Qi*b;NrmmBNsOmAoeWR|i;q`;fFAmu_lb2@ftV zBd6G(M2YN&wkpf)8v@2Q_BeDyH_uqWcJ3T0Y-k$x6l?Mi$Lr079ShZ56dg#R%YP3; zG*b_RM{*jERN^U$SULwRl4r6@s7m)>zQ5@-RBeP!-!~A$le$u>^gn66|2+5I`;}r* ztdg_BNR4*iWZgnGgG!D-(??;Q6xWqAq9UYg^@#=}Y+?$X@OR2S^KVUzEm}6SZCHfy ztbHmBy%tN*d8=;5dNiGu#hmqG7523@6gWViV$hI(Ww5hiG@r6(;;K~+l z6yEkr4lq4brM$sL#PWk>#O9R5Fe<9TDcPNRGZTd50VtyLOR5M=jDCgm!JqwF(i&|v zrGlT^$@=!mwjflXhU219RtW0wui~) zY-4q@@+eR&;KvAymz~z>>SDm_Ye~f{!5E&3xDG9%lBAiYOUY$)1=a3V)7L$253>#E z;fLD-AQBZkIRTwhvU_pTjm6v-H6q2a_zAJGI@)B2OS<`*XUCK)E$_D(QN6Nn6WcP9 zoV#~&)(Oo7*L(kU|L(_>^<9_z@u0~}I)YPMRwlCO6SITbodFY^(aLu;`p;(l-v~Vn z-1;}rFZD7p3HMJ1p4Z?uoe`5?`zAagG7ypWk?_?z1NvLSP%AR{525f^DGbSuqIasuE%$njb-DF_9EQ!laR3chylA|qg z_>1r7E=s`Ag9z%?Ygzw>w5y6MYA<)qRWjHx?v;I?81|sQrL0`3GtFHs&aIeV{1mj9 zk@G>*a^`x1ja~|yn~}G^me9;1&r-nmM{KhzH@+j#c4`5HPYDZ4Y1*?@aFhHai-_^i1Zy$N5JoP*RYCqeBnxXzj4wxq&|STZ&=D*doKmV+#pdRo zIPNFXEoU3uB<7h%+i=4;fAfccq5$Cn2x(2}yb#zx=8V(8+?a1EP-}7|=XLF_H;2!5 z?xxu{P_#y2ACnwOh6YK{=W5(iJVGV=K+lgkvB&Z^@w8HVzN?`g<~NpL#0dql5wy;Q zOA(~YWq>Uxgelpub>PQ>H;pw*R=ZYiMOZ2_{h9+V%gE;>t0rEq>C;NJV;BmFvCl&j0)hH?v9+*&YMBN04dka+8GnXuyitowP?y zePl&mP|IoY&`ZLg#pY(qg{ii8E$*Teh9{w4M~)P7RoKYV@OSdh1M7h~8&OqToSaZk z9-d-KN1)wWukM9CqD=n;=Nf<9sKCH&)73L8#h*|!$}J7$YrohEJ*U4-s49Y3u{fV@ z?3ndSTp3hRd=Fi>wD??Ct*guP=7<*B{2zl)WSr^NR>v-UhQ#`cZPA?~?LXT>C4ybI zWNbr_ucV0Q;}&q)%7kCpb$oU{TlT}{S5)0#pXz!IH2Ba2!x3-eUk&UFj9|n<`=io} z`kFQgtA;@={|)31S?wAo8yQ9*WDl4xnVgegsagJK2OBDsS(aVKC!@5~dVHITxYL2) zklD+50M4jzCu+!gs##moO$F+!@AU5^fBNL5;%d-aXCv^4dVOB4&ovqFj*w=*B!!Zl zHs646Bg|*B3w<7}Pw#Wn9|<2IOs@Y-lkeRmJ8XqMrO%m3c7^%o$UM)kHA0o{&{`pE zfNO*N&HjiS8iMXVyTKqk89mEm!^mo0|8Cq8K^&LcT$?PneEkF4{sC0axD@qqlG3zA z3k^&BJ-4-0x5Ank@>_nFBSqLu@V*wm5MQZi4w``p=HD-wGPL2Lzk5D4q-gCO7R35j z&_Ut?(Xz{BkBm(*#Pi8pXROgHSu5fof7^0fH@Rt$2LcrFcZrPn^m^Flk{lc@H6L{H z3d{A7?BDlst?uyTXQ@T(9NMcX5hSk`!=8&T4VPR&nO2UuQ|_I6llZph$$)Fe~`NjR{zp z4Cuga9U2oJwrv1q6i!DORjo~O=A!Fuzl8JXc3Z=vK2Wg=1^f2=@N>M|j78@sC&)Y( zXB0q(+DK4-8AzNQ=de zH91o!B|~^JJkOE*LLQuCSNdB~3^N zs`XOTXKcOg+8OTgDygFrl@1+Uu)I?=kA@?qiNnJDyCI&_<$uO)qxpy2*jt7$7SsXm zG2_~^b@_e5TS8$kPDP8Xh&4K>5mEBr5(e6wL-Z$z4)O9*M%UpqC0l2zbiMc6{(ChQ zpWeyRSgFb5WGBjJFopfS*H-$iGWzh2Hn{o#hir$u>K z5_I58mJ}pspf}Wof)(|*3hV|NeSt@p#a*VvvaU6_hvPgq+y|S_c-lPX=dQ3-Vu~sd zW7}VuC|@#cn7y7ZU8yoK-1qzP@~%OZqFKQV>L;K)C%ptrQ^dINPB|IH*rbxxF*|AjhGMSh5ZmDWzr?n8wBFN*VCRb}&uvUTz(iQu9DvP`%BL)~3|xtvH(kW$Sv_2thu zW4Nh@iOF8gME7pZbi)cLO!F-mSmr#^)rKn94V#7fG!EhuDSGxq=R+YUR>L%B)fi6` zW)kf{V7QPs!AIH$ZyezRqxk!Sq7!cO!|tmW+FK5~JJ|8p;HEKynJ zdZC{q4kIi#c;Uj3Y(s3Lu)AF(VrIT^EZrD4GvC>QCt*KF7Ra;2KO%EqpiQ7zIz^Zh zDM?6-Y_n5_@r65cLgCvZ?pkbqpEvI)KbcLuvI!L*2g7#O9KFo#AjFU>gKb9=wI9O| zV0F|9C!KHe=LO!?A0g;Mx}appgF~1p#4Z!cLooG-y;B3!oziMTaY{ICcdPzpezI1@ zO)b?uyUgE{?r-kq`ZSm6J%gd@xSOk96B+ zl52#RndO56UtsR263W6{1esryF^+uHTNIv@kJETZVbGJb(#zO88)vM6%>Fn`PwN=_OXw;L9 z*}-Ht&vh%ekzc4knU%YfOJWHuEis5NAX`4VDmGaLudW+%rOj+M1~knZa~fM3LRG{U`?iPL zsHTI}gR!UhRlNX7aFE{N(GAw=^2pHbtHvs?%!FS;Ok$pAfQnruCA*FoY1bxHx$Y14 zSrohv$c`8NFtTg3s=rF5-5H;LlaVhkA4vqN);O5tI?p9%R}mYO2BSKsT+`Qb{#uit zR3V1via;nuy0||ojgSlK?o+V7F`mOVBu_8!Up;&d+NO8w0>1y?`qg6zB^8xZwi!cC zQDq1U7TkCzHcG)NXYy2!Hu=6)r; zBoD?j__HW#QJT<51&0_~tMu?zW~g#!Sr=nJZ$G@?;8axN<^o>vJSb+M#oiU!DW1q< z!m5etMlBC^_X_~4+Jgk4`pEC|feFM=a;MP}PjvH1^*Yw!vm!r&?f1gg#CL@IeAVsL zu1<&+`L21xdGUluzKhyiA0%w%qItMCk;w1!>3HqFGJEuD*$E4EFsy#_bEY2ML$)$# z2-xT`SoPA1?7H~$HT+SlF1tM5wz8=S=6Eqdciz$_s1RIZo{!*O!+#RWA?u=nH}r;_ zIM&ftpw7*7Nvqvb^y9fRf*{ndl1cB5t(rrndYiy#HUvD8CH8wSag5I z*nzL*l@`WYyKg$~XR6&B19Z^LYr?9T3duaB29;D(Q&l&dogA2OdG!sN?A%Aj5MM@u zl5$)gCB@}X4ouqH;8|-eWk9ccwsUf_`^os;aQY&ruplqawsvX90eFJp#X@OzWvKhQ z>y3B*=(Fl)oJZAtib_dyfPL5{c7f&K#rCVB1^DM1)@w$;FX`;ms)lEWvCtn~5Af63`n%wi;s9M=jA*Ds~xn}uuFfLkF%je*E(bPXN$eiUOcq&PmDSZbOPb!+wu5M5IB zSj(4_apm9u!!tGpEu*C!EM5bkE@9sAk7vkVhjj%d5^g6pIoh9#+!0vM@N;Ca`>VpI z4Nu0i1jM6h@1~ZlPrFmRy2&6cef(klk+1}_?6{~zRnwTX>u>h@QPaf4U^ROWt*H3o zTM8@NIvf*3#9T7xaeLTsa1*{;vQeKUs}?t^wWq-Iz?;6&k zzb%9wKxhUBUOe@XVEu=?ygw(o&*-7Jh$Lt)G>+v#6RuzArAJjKU?>s?Ipm}1< z0Xdh6;?b0+14QC$t7S=(_;<#y~#DwStf`WCkirl0Tk{WZHp#`NVw2GRrj+G>-1CvE*+s4S^`k%3aY)DYpg`r8eupo}EJg2n@dV z2J&uTLFwUh+47Axah&%zpYm3}@j44qbmoYX>}Umo5}OEP-VsK33t0jkCo=8=w?s@F!5 zT0~^fY@oo?f9ghln_3gS3mh1qb0!tKf58IkxGWqZk(M|Zy9rW;K& zheXEeqlEMM98#Ci(_m8!l?GL;wH(7WZd$D!Kz{OV7e6cFhfqIdqh=d(b0kh*$ysj* zecJ7QOUtQ}ct0Y$Hm}tuL$38qyQlCEjsu@~{ejog2wRo5&p1)U{IO$Y2`OcIJofGB z$oHD*Hy>#)_XcW7CGXgyQrr==&RnbLQ7(U&Yq}?5#EK@~!+$sY_5hv42BQ)sbu32O zHdT?um8cbP zE3N9xvB9XVk5|PLC>)NQma4`b=)esYnCDlS$*_6z6MU3wMEszdLaqNlN@zuvcHwPj z58O6&xx=8&HdA)DN=!&&6G7G8N?7nGG{_8`GHBIuj50YFWVmO4z!+d?CyM1$nHkR( z8>>;ovSsXnh*01PMJ`Z|H=9;#OP@lY=a-MwV5JLTzdP{1o_}0-YX8VADsAV-1qk3; z?0&Ln7(F~acwuwyx{aX?mS%3}TX+EwuaDON`R2T?$yIL}VZmp1W}qEyixQ|lGbM#2 z|9y;5>K!ByF<5S__tqM;d_KrHPXesy$OC1*&Rd{Jf<1s%1Fnl&a z+`DBNOS&^HvA|YJNOs{-Y=L6DxCw>b%Y}|>(@POJuk4#d`L1o+i8V=%GE$%DBA}Lb zm+Be<*KG&fl-!A-Y}1?6If%cdZ5I+E~n7O zNB-1j7^5|w(8i%bRKGoo3(g_CljIj)G;a2DSb6;m98?Gv(Jq}Yt8YGWM2Of3B^zO| zDt~5_$np7w6T4*n!~0n^rK=C0CnhQ1RKs#}r^3@<$$;4yT~!f(GMs=XeszNI!iZ`v zisQAPWkL9(41A0bkydt)o>SD4_1q%+lIZYzIe7Q0Aof!iw{NTBc-%2>jsfg#F^nfU z#mVfZ{u#0NM)|-oZ__=er@&*Y55UeA*i72fGJ*VJ=c5bduOa_1>*VSl_h6|ILIbGt)em=|PugBUWj)7O^@69t&Q81GpteqoI`F zs_w5)(rH4Y>{dT2Oj=oaCl0%k=(0zoxK_b|u@DI`S8b`x?nvN@d=>egtsY=SJLB+@ z`OgC(f|)$TSE06o8pNqwWG8pY5icS|5fh#{Q12=&vLdVhTZ8x9KK`pF3G}^%;QcIiYA|;_muRM(@EIjX;}5^({~Uj z+3a-}Z_@HU>(zxudKmxKeT&{4PLC!5b9jP2?xdAuw{A3l)e-kd=F1LjPou#qBi~C z)Ee>+ea#muBkMX&VQEeZ+g^(t2tk#w$N6^!#8u;aNZd>>+o5Ls3<<03ny*V3&R}$TJh~>IoNW%u*xn}Wh>~CL zDao9$^uQl6+zZUEjp;VeU^@CJ*O}3~$R%y*b==<`f0m1}49_~IZ!;YgT4Ph#JS3sv z_=MRRta|*J)~IhB(7M%L;Jb8WiL9*~y7(xoI@Ge!)>e>lq>fgOFX@f98~e7YfbDY6 zJ^}pMIb8?Lv?bjB!}ZDi4tGk4cN+L;cf8mZHm39Z^XT1H#Ej)~+&gC=#=o&0VS=dg z(ZT<`w_s;to{_SXG}rC3RIBTd5jSmrFbBHAY`74V>tYR|fo@}a=Qr&C#XeLfGmwsY zzp&V_R0N|N%!T3{AX{K+dM~yEUxv8~&UfB%xmimnttUP^2Vu$DLup>wjYpmot2vKK zZJFdbhd`{ir<>c_6<)8k_`dc|9l%X^%u|ap=|jP+p;FUL?LE<51^p4AQVXdnQZFSO ze0>(g=G_*-uELU)ryhWg|BH*FhpM{a{Z&o}Pe1+yC43j!ceJm(GZx;-0cHL;c(!z4 z(n}kjY{yCyJwP&r?J#XC4+$YJ(b`u_EAI9M7M5j$ZQf*+9Lj^unw)=Fo=hQJ?oX`J z5g69tH%p%0d(J{K^w*vY>VCrfpPN6Ss-ASCg`VH^7jZt{bw9uE)G9DNScZ7TQfx%X zgSzB`%`HO3u=cz*yzd#Zh0tROE6`Dw`JzldgUB)_`EDSaHRK^vA6&j@wuvvxI|Otx z^fP}OK>@+>%?#}jH@3tL9I=o;OfMt#A2`Bf;pak|O8sZD8KRO$5S%^|@ zViBq|oKzNOfGLSZzoS>Rq7Bkdx4$G`1x)qa0@SK7tPJ9;Jmga9)tFHGR2iJZ+|5+PopGGb@cAbHk^U+O}W#(HHyh|nDHGjskw5unX z-gx76lWt{M8ah!fbF<7q&Oe%qKhyG5BTr@vBzoGe}rg{eKEBNnW zY6w@6A+Yit2nL%wU=jC(g-LR~S2q^KyuQ(mbG$Wc-O|RgVO!No>8#ukGE*htkPlV4 zX2<-_HmN+T<_{(b3yfRJp9-bzcM2lizxQ5L=_4u$=jCIjhWc^zF8t2PFoCD@R8lPp zs;))W9C=649Yr|Jx%Yy*c3oqrd6MR~vj}2xS2VDodL}dOe%VEg2VW^f6nDg)+ZhsF zWG$TmbIy~B^M;b|LH^SBUymc>(<(R;bp3?WO2KAVrW@}QZ7YfNZE;d$i zj<1`cPiUP|j+i- z?{M?g@Kc*Zuhs4iOaDq;)fLx*f2CffSnJp0j#j9~I6;aY*7z&e8+lMVw%5K9yN9os z=zW#5_OoTls{?O@ZMZx_tEO&DQ@_=05e|`u1ar$*87}%!3MhVykvh4Is|?ESkCPAy zGh=S&E7@0cH@xw>BAkn)vCl$% zQvXtITv-@#btOON<=FnU?vC#7IDUb|c-Z%%VOcoj40b5E{7RoZ$5M}&g_bey^{3>P zMr3OP?1;u;pjmO_ZDjJ`a+UL5s=gp(d&2~B>cv)kzXWKoeHaYm*&16br$3Trkv~H1tmms)l=ie zGP;V8iHuBg%}0kbpnVtEs5F87o@aI0pIZ%6Sbp7ZjHY0Sl^Hpi#V^VlxT=3amuRxT zWQTD@*3-!wOjk0m2t#dZg%?Ba=+$y{su34(()C6QyH8{Zt?u$zPU0S7$>1bNz36k{ z5#G_N&RhJn&>Vlqxs9sjX_zT(JL}08B?!=0x;XkY=GOsCan5{8q;0BxpEO(X3ZqND zx%v4HyZqmYpA!4WhW@$30HQkfEJrf;NcHomq9VB52C03Q%<`YakY4x~9`3H*r0t<= zVdl6nH;~PWn{-p{c7^)<;n}{mxvaPLLiRc1uh#(VRZ6&bl$BF$yKx^fwGyUjwH)ne z(r+Ew#$>u|mkP-G;nwW%`W_;&M>m!*ILSSN*$f!n&-J}yv`Q7SL(4yhJbu)U4fT_b z*k=Nij#6L)Eq*?LtbK+PhVI4UvaYo$=<5ua+7vryTrT4KMlI-1gt^Vvqc1&o0b_cr}#{9S)^ z|M0$chR{|xiAhx(}GtbNf7*AgdjOc<5ME@QX_9US=J z588wSmy&wbSKHtzfIEnJ3mR1FW)nAe{$rQ6S^`llU)~=Ab=Cc-@|-ZSSaUYP(2ON6njQo zIAAOVKH3&M>Hf2K?GHJ9rj&4Q+004@Qu%_ICBbn_ zv!7Bczcn#fqu;K3yBSE!Ei4#XezA5Wv9SU1Qz1gIfCmHLlp2y}tvZDix|67tFL%*t zCwxzkoQiR}2LCsUcnOi=U0C3j&JX;5T>!Bs8o|eH1C$+Mi^{0<;O%6>Sn29)e0U7m zl9INwkLQ%kEd)T*>vg5k$I^`wQF|LKmsPc1UK-g0=nXOO40+(+FpsWPJ@rJd1e1&J zi)X6k8wB5}lMCq%Q?2(_=H5&E_(j=GL~pt=>j?rhZX99&YMEb90Alfif0kdR>J$}N}jdNxv zO_3Td#QiR$;Hma{B`Br=W`Pt@5g|T2N_7rFpTIVqCboH^fw_607TVptn{$HuqD&h2 zw?uY0&1RS^dE8=@*c9)Za+n}AvkRS1x7taR2AjB^(+VML|l3ip6jab)L)mO zpJ#wBPFCS*lYbpSzF2%m-!FWeA-vo_M!6bW&1YCvfD+r+X^cb3xn?T52Y3MqHZW~^ zvt#e2_j(#MhMhs4>#+gNe%!+CYRz@p5k9pu$#YAAm?&Nz_U$A24)AM4D>d&~PiSZ5-LZzO-4Yr0JX=(=TfHsr( z384}c3Jfzzah0k4t-4H#5gIQKNrdF51KMV`j7;A@gAHZj4+8faQO__wDcYzg8L>}L z!>r*Y>=#zA1c%H#IN(*9d+dA*E9Lb+FkKkqVp>b9X>R6B&m&`_5ZW95tM@4bqZ(`! z=+TL6A`~~)q19;4C1ZFXKw~=N*QWovWniLA-;P&U0HWf7Y=UP67mIzrfZQcyXo+Cc z;OPVes`z?MK!zIW+BkQ#2Fb51o3uP<^o^Rn>wZL15M4$Zf_$!bxqPn|~pnQuQnQ>l2#5QXyp~RAhMe2=VJn z7hBz85b_;+h>P!mZ#4QhxrWhWUE%XT+yAFSdpwWmYLpAPX-BM%9*qrVBeVLTJAx>!s(`iHqNv7jqQ@7qnXyk7G(khX-QTMk}#EjC<+8(8zeB{Zq@pro9|As%i**>poYY4xG1B?^y^Qu-@>-2 zw&Z@&$&DWQ695&rbJXp80POBN`eBzGB(ejJwP)j{s=!vNDxxtBG2s=GpKG8EUs&QPa?#hWMuL~ zNBSF_jRX5&aQ|lN&Aqt68RV)7bgng)47|cw-G^s%r6@3WA1{{%{S8Ct?bRFYS(~ zLJWiQ%*fFA?%i+Nd>>Y2lk)}t&?D1cBZLTHVxCYuPHQB&Y>Ad&^0P?*bXufk+l-Ft zxLOkgDmN*st}75YLH|O3n;eQin2_ZD)dXs1F5ufHQY*ncGG~%eNLCKR#Fp&BYyVGC zD8+FFbc5g87-a`;MWQ$ix3GjC_>7o*iDGBj&tl zpG&DxieGI)=^vb;wK5%qu!wi5hPF8ry_#SW5RxgqvIQ+AgNg1Bo9SV&$IP?Lnajk; z;HMr&Gc)Q4!Ic9$b;2Czjm}SUH7P~1__5M{4FtccZ;QL^iHO2gWJR(9Mu7Jux|Dk+ zOc+lyWo3AObN)9oQFs49STz&soAP%^%o^xeh2&1tN4%D9Gu&#{zw#T)CjTJ;P8J4O z)llLbzs&cOj2APwm{A}MdFt_c?rzc=8PJW++-3i4yP-as*wnee6}rb_x6cOZsIgYmy*9(3ZGn~h#$^e zA5T}`<;V@euv*(%zz0G9+E&N?Z;xTNGpc5dgB#YRVM2kOJ^{JxNHkuYm?&EtAuli+ zG^-73aEsd|0fuZnUDXC}cxJkl6-1V!V*E7@^~0FenXF&-p)ISV*S=-%au>tS-M2gu zRp$DSFcfCk3CiwzfJrBJH9>6W<#QDNt&$B+vK*ToN<&6r;=Bt6ASOr~Q=vf#&#_o6 z$>e70XLn|8V#$E?N&Mrij-%=OV#e^vfow{+0EjAfw?RyH9UeQOQOD)_2=MPc#630 zNULUdWCwotGDQ`#F#RJ@=al4rQb&9y{W0#PNV+95A6^+6oM6spGDPB;Q`qo=Hs^d( znV!*S>3LMf=Q=u(Mj;neJv;_w{i^vlA6^s1X28QCS&J4|%#g}x;w{d*+|2yLBJqb! z$L%UU38z~#;Jy_Z$CR3gZ_$mI}oAS zt(q=|r`To8DEWkk>U&(_(V$BY=d4m|+j3FHN=@@9$a?gitQAp13u7Yw*AMTt#WW2x z2$!--BTWD3O5v#48u_~U?u`x)ebASii*pv&oR*2^DgqpCc(m`8tj;WsE?* zHeXLH*LN{v#28(vf8o#ceNi;ob|q&W(P7*sC@P&X*T)gNiY4D6I@)S<2i>84QWl9o zTYw!8DJV@i7Dj@jZiX^y08xEz7a>$iqgjR@KyiZALX8(Zy|!45w4^~X<(a<}rnw=E ztGpYGE@`JP7?y8WD`U+WntRQ{KG{j?|4q}o$0dFD|Np?#vb%^DAXmo!_?k$iCm8}-g=a%ST^nThI@#*Clzq4OpT6X0A;@jKe zd<2~5)<6N7S~FN%zkwD7bei3R*0h4G!Be_s*1A{uTKlUB$lB%ZrAEDK1{wrxvX!}- zPhsxtJE_V(HUG@ZJJLiU9@J)XZHT8ut#>0|?N!QpXHDZHD669_A%xUh+ zb7~SGM~haVvpa1o(X8cOXP}62ox=76ELT!7ooAS?B0@!{pl~TqUNc|HJy!#QOHpil z(>1w4#bmfF&6930Oc<;pPbkb*0Z&DGs;=wOIs#2!5+7$K@jO**7T4XSrLT5J@H~U_ z!U-JLWf7n$8CrTHoFCPkjIOMs>@{yuWnoGVKIf(nN-|gU^cr;M7Js4NF+6zXbm;$x zkQA?~WA&!#XPmdihQllfPlt}!g0_(t<869G|3xFH*!iw>b1lRMO1IE&Q?$4^S!Ua) zhCB9M*u(I*?H^fFlw^Fw7kG@0fQ8gKOA2cFjz?~^0S3z(+aFS50c7v}Em^aVx68D>ZrG z4!1AWA7Vobsh1}0kP#x?dxTz9ZPIy=y@$Xet)EGy*_KlSNo1L`S~ z`^p^dHzgl5I;@Ye?n363^t_>sY~3d4);+0fTqdjn?!oG#tgV4XfydfMQK7d>NMp^B z=dJw>>o3nl;3U@$yqIe2{U8Xte93hQN^Q)_3fx5_onN*B9d%Y|V4QE@h+26aU9(0W z;nGOkoW-kjTM3u8IvIpWu^uAsM;IZ{lb$CDJrYbny6-s(fEiws|H;*|15BH$Hhc9; z1Afyl`}{d=&42s%=Up!iAC3}uKduiq;&~EyIg;e`zNY#7i=o|kO8Lv;l-+_-Al*Ec z@er&7a6h#VM$y9M7{B3A4oa_h7nHO|(b5j72)Y~`ZOjIh;45hiJJy|mh?`+u4;^kSa3LdV0r(c5sG-TDT@@Jo92KgBS`a32?R0Jpj^>l35 zsC)Tx#euk{(33Q!fgJUBrFi0-)1}NP-Bz?RL-4xm*I~qCHsowd@NZ7cX1iahDVm(M zUxGop` zAH8Kbytf?Q%fkp>FlVFte-?%7i7*bK(b%7Y*-kE;fElxJI_Aohezuy{e@|qN!Sm8R z431lnW8;|$)v9|9Ik*CRreV~_o_15@$xGeKKIc0z##qS`NAVUB48e6K7hcLyrMRXS zOa)n0e@B6~k(U&#W zW+V+=t8VCMDs?Y&L{1796dY{}JjhL7&5P7H&*2_X6W9L0K=-QfLJxoFA{Z`sbta}N(^ zG*m!8c;6HutR7Wv;FKF7Q`@tc-AaWZpnEAH2)>P~DjqFNh}3|_57|oB8#j7m1yuJQ z17+c57S@bZ2Mo^*y?(wq9G6=dWV{&yN~aG*#KOxpR5$t1sy@wSx~(gbW4v3@xNids z(b7#NcjA4ZIP4LHGcIkswj)wodq_d5>G7cq-Zt#?UBN!1jijE}1W?Dpn+Q&*jB7ja zFKpz^Wak1Un_9D04SzK@_r*oj>sQxzzO@l*k9Eij%+{p?im26(;Beu|oHl|Qj`mOZD zsOFQJp7BYg1nh_gHLct9NMyIk3z*AoCpMV}-BaLzdqH!4!H~vvwk9wDC|rFiXKaXK zU)5^j`Eb-vI>G0GiPx={j&3S3lx!tCvjce_vfC)$<{7r~0b}?O{|;bvE3IqRKg+a- z+O9pzoU9&?aTlN$h?TSL@ZXxt!B{l)hTqD30t?kLf1xmyTmG&zr0KIH;{QSNxGkj; zS?8R=Z{$$N_2t^bcx3B9^iTEcgJWQ-jWvfnf5f(=yV0lKqQAXvcV^MO$$b9{1)fEk zo7S-IE7TjYH%Qh8pYT55lBRx!ciG+P z&(zc{bp6;0n0`DWW3Mh=sYnl))hDiDeYvTANDN9;C$>3t9Jce(D_1?iy7a;xINR1F z$j}VhXkNY7?$2<(fj`q_*QtB2{wYk7#?SD`Q>GqP7if5K?alfXNaQuErQUgZm0q~; zGP!6h!D~;atlZgDR8%P4!7rmck{U#=YVXQ!jUK=a8j}@m6ivaPMFS8B`%`W|)qMH` z@!Ny!7P%Tu-R7P9M0e5E)%3M|fVSR6Q+xfp{KX+5-NCmKvx{#!cdse9lMzFrW2#2m zHmdhEom=+)3kX+~BuB)Vamnf4+q52Dor2~KS;=>z-*@>;r5b)Zw5bPFfz$}IL~Q^G zH)OZ0B+PqXN5m2TO7L4YEiy#IiK?gxmo_XxBr0*9 zAK%j}IjJ3PGIk3#E;O;wmH|$>2Yw?h4ML7-k`^U>^JPSq_1xt|Cr{XaHJB=8q1t-N zy-uFjLuBV`Rp-6J1z|`WHKQVmZht;&bAs;a-kGL^j z5k~_IQ7Ys4?pKrOCteJs-_zlQ%*z9xm4&+RJt@OgCY~;Qcv8yGGlGZ4>%n=oUhrhm ztR}5O9?ti56j3_zZi<@v)6Z@>`uny2`209cKKQz9=bMq||Mk)B!+yi--|Y7OKLK~& z{G!(PtseseemioZ`OVkgyYl*W#QE1N_8dQx-1xrdtN!!nst1k-xAZ3Ic(xbjXenQg z!m>zGq%Atr_?hn3>1*~bDC+nrrz+W|Jq>gI?@`swPH36)3EA&=@>&l}XdlFa!GkLp z@^n5)v_TDv4nu8En-T5BR50`m_EON|-}LBlbsN9G)>iB8>#`4n5zwa>cp_0cU1H3bxC%I-v4CM`QK)BK2nX7f`+~{p?ED z#?JC<GPt%v` z?h$X6EXJD)59Hme)OoxY@sq8e^^BJfWT}7!j4I!tyCw?PELpaQZ`%#a-Svr!X~&h$ zv8Cfia!3JkONz345i~6iZp*8Ga}PhoKcB0km4w%tYSwRquWOe1e4_5;;Awm()6WC9ocyHjj@7NBfTzg zqzG_TpET-)i+UO6m@QS7UMe+?kM)HDQXM2kIZs7f+W&~A11}?`kdHK`kGU9=JEXw- zhiu@i#t?H-Lo?8LzNiQ-eG6c}^TlS({NsoiA8}FyHcWMcI=4Q8(2SGL!WT~m{($A+ z0B8j7&<0`U#j}-?E@UH@;>jd#&2S_8#e-Qig)s*uv{Y5`9?ScY{rS?&`Kqf>03#qx zos^V&8z2k_zfbbyh@?0>4h#*voH=Ehy>9Fc>9V?U_TEUuc8!Q@~&KIX-p{jvF#J4 z$+)4q9(qL!&@=jo6ZKg(2=K zHCg&t*Mr7t%vmC4xhOo9Cqc|d2DeAKz|crqoVgtYI`TsY&%}yHV|Y=8*2Vg^(;15; zcb?uuMH#P1gxXh`*heE7%)@w-1f9%nFaopHXc(hea!zhBGU&$JRjX99t>v!CNqV$1 zbz#SZmXB%*6nZ6EcFwhmcl+fSO6j(in~w-|&|jk8NVJ3|^JkVG?!;-EYJ$I#buwkI z*XtRnW^>%9p6-yhW7ijKe*7$Qrnbfc^e>b}@u2e*fLAdQnaaNj8PMJd2vt;`j;yyk zH=ARdbWgjZlNHSH9i9>X?b_Y?panCVtc3?%^ zra1`(@7cR%j*VmX>PDOpafb2%dogpb!Yo@FuY5<1$OPl%6Q8}RA+HV>sSZaAi($7F8QPEgVluL+H0n+?Bk%ys50;{T zERIZF8#^x=?382jZvH(TtW;Bys_RO;k?F?AZI2eu^ygmV5&-xlbigNOL;YeDGIs%m z5W4wMDspLX$khEulXarcY9z5C`v?*VjTz{inS&mv#tC5p?{f@s7Gk)kmLIqfs3gxB zZL4U}Y*`ZwJ;(>xV(caGxmBfpkI+OBQ9;tdX}=MW?E?zkVCQx_8*92nfecYmiQ@7e z>4Wi@-tB|s>6>PbDX}JfBy3ifm5aSE-sP>ssCER}xg7&H)D`cnbZ35_-qO-0>wz6x zIPRIPPi06#{dm*xbYo4|j)@!y7GOLL;i{F_kySVqDHM)l9%vtUMI5N}SO(H}zHX@= zTCKBPxLLu>ySE->TiKWUX1|M&M7bC93$V7!9SI9I^YSOx-Lcs(qy{ImeZ-0gWuXKm z&_b!iexVt0AM#wbnUdz7Z?L9(sz||A_-Vsv8JwA@gaAYAc>Yb?x=9S} z$?_X5>eOOOVez;aZS@$HWBAGdKB*lyGujZr^Pk?%Mkkp$J(?2BNC=tVZb&}|N8 z?dPq3v0b%^8 zt}PtaOvryrbtYIBV+2kWa}TYgv*vBBgEJBQAjyDr&rMBxET z3tC#oQ*V4kT*$0Jk6@p@rpH#MYeub}$fG#lvm*y8bHd6MYgyY8fW)BH`UoRf!OCB` zseX$o!&K&6kF47kE!`Z4Yh8wl6Y0EQBnIj|WWS#Am)mOm#iz(5DPxh@S>0cB%? z6O7X2Ya7_m_^6bLU18j=I1t7BLmKXpd@Zi+bg%U23=VDjXvoZSn((O`!+cdICiPO@ z2>_ZZZ(0YZAx_>F9l-junDWcI8nC*#N++HT&M#>|XQtcQhK}|Vc1XUJ>FqLNZUqj; zNbP2tpSL?%=f8Y?>mT2?jF*d^tbREfJzjs{Xv67-@@7Vjz0eL&wagfJ_n4RFH73k?r!A=i$}5 zd5`fp4;rB|{{Y_?(!v>Hn)54+zYCedn0e;F`YWrNiWU zH{&ddVotMuEtI=vFk#xi7`nTFdxHpdgS zKVPk;WiKx`vX5l^g+)-p?6-WzT&1_pg&%7#DJ{Co>fDoCbc zpf;SAE9e(K`CyX59X_J&1Si=&Y&?v_)}!ALUVt?rIHrjvvGTL6?SGWsRHk6l;sllqziurb*3fzPUjw@raYuDp&zQ3eLhi17NfEV#4CKiLWuY~-gfIm#R)!TBEArd+rI0f7Tl`p+h!t*8>?ZEu6)ibb z^9>Df0{e+xdJM3+t*wpy70w2y&HUyzbr-hXNA&Nv-&>b(^)c_jwt?;S$qf13qBHB3 z4~3`xav)r6Icx08~$V4zTxG9TE)*JKv>!)%v}JCdE8htBXsl-J(LN0i3>L_^=!nKwE-oJwZ7=lI%LC;VaOmSd)` z6e;Q)M`Z@H1L$5YafEO47LLZCDyePt5SoCjMP&61JPogD<1DJWd*N7R&}riAnc$`l z&eJe%eoth9{(|;l=Isuq?{r!GBs?7U1T*NG?|RESFa>a3eI*SBeU$X;$u2O~ zd{;_m!bt!rB>lkeQAUEf#wHmYtgSpihUkK>0Cn2LN$5%htB(y$$%oc)BfEw>w{W15 z#6of7WK|t`p5NfdDUK{uHqA+{X<7Za@~%Hfx5hURlTd4+RMknyc&=rPq{ZPD6PS(H zQ9gkGF^As7(gXiNx#@ekrRY_B0!}U?Xmyn7n|rFMD+-hD zsAP&avsb;99%=fcvcmee4Z1dlgtNeKX2jY{z z$1R>e6isW_S7zMnc`7Rxb<~$G@OgXQqXwD>JM%t8skCR01tTjW0873GD_{L)Qpl+X zRlGT))zW4DexQDR9EhSfY|ypB6fbPY1pS!;lBU8HKk%9nA0x28g>7(mb=S9;d-nkN zh&*M1z9p}X?~dNjL|JVqf_^Zb{`@tPk8ei9@53q5(ZBHV=Gq>)=A=5J?X)il*`nt6 z%=}a3{>PzPCpDVar$=TmPg`J`tPAA$Not@_5apXS9MviMAi;Mrk zqVQ<(JxLZsFJY3t18Gmv1@X9=_?2LUa}bc0oy&L{_Fhq^iFCg{zQ_7J@^jRCH0(T_ zz5a5qctQ}>8*pYmC=TP0KnDMDw5`LhH6)7ow>mG0a(ba;cIl0?oQ1V4?C<0KU<3G` z8=7zZueA`l+9Hxmee5S8Lk)E|Z1!tzT1n{D%bdkVMNAbO*)Gd>JrCqxo1NdT@3~i0 z{IWuqrJM~-J|DWhejq0B?r;zNsQ=ydaSS?TSavs!Q5?0ehM&H37 zEn?mOavHM=*(Zx8nx6F@9zc^+g9)pnQmVtCe~?c;6pqW`{~mq={T>pdh;9YloNa(g zf|~S*Z?ML-jrW(;CLw>@JgblD)UvtQMl+Di7lrqpUN`sXK+Cwy2a%(zH+cd3h_Xbp8U6QZdaLzgGqNKfyLU;{}_+ zhK#34EuE;V&XV7!wkX1bJINzG^PLTpZ)gt-9`@4P1p7NV_MNmav&S)bW@tI0=){^8 z!`OO`lI@(}-6vtO9l%^*lC<>e^jm2M+bHo^;(j(EyQPGdsC67YxLP_d>H7}Mov&nPQy%u~i?x2ylD9U()$m&pnkr z|9xv(+R%=o6AD=f8~(UM@j!+UG`SmRs{aRE|5$DS62F6wf}Ue=Y@u5dtQ5Js4zZyt z8!odqEY#+>ErTgYnx!CQOePadow1x(=UZQ&oGh)8H?cYJQtt!(8K3hHOEOQmEQK&P z-urg=k4rz*O3zS!Zc7FzD{y%o!! zxEX&U{g0JGUvA2B(ckoj_A-T60}i>qK=H-P`|*U+#lT~y2igzgHvb=3Po&hYTF(RW zu(KBdg>3L!-coe~PS9jx8J>wf5w|ds*;0&F&n|L+M{-A6Tt3tKV#{^MXU?+&26x7m ztg;393-yP@{{Tvk9+AFD&*s=iSEdIv3VwVBKMKxvXTv>x0Q+|#+suG@?@;Z)*R{w% zct@L~4@%!cv@Fio_hscOLMMBB)GtaS54F|FH5VA+$mX2f?1@xP$2B=YnJDYoj_OT~ zmleGm{AY!{!}-SU*H2$8Z4-XhHYC54&Jdo{c3C;m2a}$2{e=2nXBl_W>@Is)rn zAa%l3?+W2+5j`*tc*2Lnj_-nu|-hxmJ`taI!W zRs9xP&TP29_ZwcPzc(8z9nEL6Y(u>YXKnh~OD#>5eHCioG(@KkR~cz~6D0bYh|7~> zQ7Jh^Dy{t_Hm$!uT4?=quI)(LPylC{hm(Vg?z9oGJR(iBY^ED*w_DAJJdNfq%QFNB*m?DBCALppc5=?f&6Na&!k&gXU@91 z%yDejB+M(rxaxnyNwo3>; zxF+Z2&9RSN?nJgTC$|`6+lW7>A88dgom!l|JP^%d5|2$rrt%(@gr8#!o?Oe+y1La> zH+E^h`6OSGky+TZp|5(&Y`9Ds&>Ws@{Dl%$U&1&~ph17CpDtQ9nyUG_8GdxmlloOJBDB85w>pn2^5Be=sZ8k%`}1iT@4^ zyqxnx2Cp?RqiHDB2jl>zBK4(;hfvereQ71wyL`hlmQ5IUDiMq;q_lVaT z?b%dJ`}RWMWmX&XpHfRSc|VHqZRJdvc=AeZW<=ULX1Y2mg;XkRnv7UvuuB3{)SDH= znsD!1z$|WFRXPHjaa6g>(w%<@^nb+5*Q2|^{BCES_@P4N)-01@c(@5`=i2Dhh`TP#uxEO&s zww-)U%`dseI>4Xdg$Uf>s7%V-DdN`!_|Znznfrche$ti~-a9@n?E5l@Q>3$`vZYx9 z>OI7jqcHV=ffw=TqaI|Z2z=JQCZvCndtkVyxv=SKEX^VRAm&7IxcYv&J|o!cTra4m z261Qb9c<#Fa38bbhJ?sBn}-%pIeUzSh&11G3P_ytnQ07eG_35gL-3Az`?-xgzmK?_z#QE=i&I0y=gPm;9X4c%m=mr*J6z)08MCmpdBT@?N`eq?anoWa~@jx7CY`lDW45+6;n(W>zOF1|cTd6+1V7q6U5Z-? z5Wb-FJ+%vvS= z6)~OiXL!B)%fqkM0Qvjsjj%eh!^(PnwaiDJnaC+@KWVbOaALpfDwt4LXNqUfE)_v( zuCPOhQ)F$UE-N)7^i)?tRvAD~)Zb{JxY{%G_u=rMl-q@%ttqdd%a80ejDLNQ8Vx*! zo7=~zQE(*2F-mvdIuw}n9wp4BIp3I2{^&e=112Z}88{tRHVM-^x1cKh(VfxPP9BOvfNvZu;{gym-s^=fO_BW6VQbx`_ZM$5FYS9+04rVewd68u6 z4zRDOG4mZ-hBJ~_qc4dd*=mVBy5LkPpB`W@0d~v5*b`=P>WUfI8rFa*O~osAtqT^2 z#+*-(R%eboxV~i7wEkC}XZQ=Lj6awW=d|w>1s1J4rUT08EF)h-guNg}PT-#T0Bic7 zW6LJmVgh0DOsq)RoO(8Opm`;KX(BbAc%62yyPM7@J!0hb663<`XRx$8-E5&YE^MjB zZGD{>qk1y0q|FI3&@^n5dIODlng>%$7d&Ed4Q{tGT>3TXA{^aOHPg!%q5fpr9v@DO zFHhIpIFYxXwWryT$r)}-Q)UJYo~!1&-q?L)Z*`~T?cuhJ;1uy0*6(ALV^rHm<7Fe% z*{Mp}mZ6qXnhFdXE9N38ew2thCro+M`dlp)4b)I6E0Z{Na8NWzZ1-X_Bv{Y8mFj_PmQmq6%k8=hXv*N86^zkt(xNkz?QrZ65_NGjUj`^l<=mFWBF zx!_d^M)WwJNpir=bT}UoA15vHNb3Ex&o#&v8`{`~B<6! z#rjy~&vE8hAA~U&_AZ`%id~5%-s!tBY@1`>Mr`$^4n-6hJ^4q5r(YSFWLQ-xVaSz4 zBXO5yOJ~#Z)|n~TO7~G}=QB{rVs^BF;iATftHQ++Xs}de@6sNkwvX^_HyT=$r@&Dr zk5*j1ZhAZD4?(V@&EzvDg3lKP;jOPEq{D=rHqhEY+@+lhKr5dvepG z^aj9{at#~V<@&R13`$+fi(SR@c|V!YiiGf`T$s7^tK_(gfzAh{LN_iVElh^HZbE z_w-$yWUlWcj53-7bh>jx|J7zNP`v@K>Hx0LATIATdvbXhThxcc?7s);rxL9FTAr z5qvyIWhs4$dZ?`}WJb5CAM=O-Ciy?T?$}1dHrnSjEjiWWfev1=cOAJ>RR!g)oF96i zTK#!O(?YPW0c;aBfe%`AlkcPjuobj8O32yIo=mIdatP^R^?Cr4(&g_fmTzKS9^ZgC5N!%OV-M=-?L<98zT`MSKa`Q@;1T z;P(47Pl>+5j(NxX3gI(3;Xq)~@14-L3iNaKqpFC59eRkI)WN71%0sUD_4{%j-l^AQ zrIbW9wWSV_ABfbLm9?=~2=W1#3Yjb;h*~(wvNrznxv!zviXeQ{bJ=^M$S_3@NH|{_ zu37HR9GsWc9&WJ!Pwg9bE-kzp8W)%U%es-zlPHDurJ9-fCH?tBQ6OU=uMu6D4|ts@ zg&2Z<9S@C|($`QxSqOr>^L8T<0x~7EHZgOOHCR%qginU(%6@hW9qwZ&sF!b%lx0JH z)v7`MWsp!1V#sleod;Mi(pszpM#&;v){_b7-g{^nzO-Lp)yQl(>IEJY`q+Vc%T6d; zIB*scv!C4pd}&fSwD7(qjj%sZn23#=dAG~4)B8ZiwP+X&UdhBC2#uB@1EX2?k}?7I zc=`(LtcA%z;E6Qil@kHypy9j;YmC$nd&IE=AyvAi)>FhJ^G+D3XTEA}e48sY-|{Ir zDLp}*8kM3VTi^E2MXkNG)Gp6!Mq?B~on*szybL*9#*u51`27u6Bf>Wc-7Du01%haA zd6z@p_v?1?D7PJaCQlsM@Ot8}e~hmAJ~Kbg+>zq4KF*wd6x@ahYt)Y*)=v6{(c*{s z2TMH(_F#k5c`VmIMPSq49=CJ*e%5~EGX2v)Iv@h9L!yhUm0dKCuMUyK zi0`m+Ncu?^=3rFPi_*@Ml71R<3R3hGgO2x*d$qe!(PIUq(WppJ3x?q&SHb5I`5S*? zGsrx@>W(f;t%Iee2tV#+>tfmPu<7A+eO3tS7Tm3wkC&cCt12vDR-Q6(I^=HUUIJ?z zQ)wUeNap+$t9jTJz`a$+cw77?c zjjf#8L(2hK2f5r23Ftrg|Jo)C1VO-aDc=@J)lWKE z$CbCR)SJ*9Vxuuu6jiK7OxY_#>ra%vcmcnsAmQ+(Q1gXkjeTn4%A!OIZP-nzallPRz~TQCJR&j4~kF*f!z+w3ke4 z?YN44au&^jKA(I+aN1|T&>6DLSDH5Ri<~8AzEn(T$EtzYJjce5=>nU}AF%}K_mh6- z<9UlDZtBBrM2|Maj*KuU8C5k$GTx4v^l;Ljglw5OOT&%~GBmGZa1ZDHJ#psGUE(jC zk7d(4G1`h!Q-2pFw8wl`lBJ|~c>8>yAisUio)J?4*x{~JyjwHe_XGn-{B+_8r#@;1 zY|XbOUhHl=8&B#}ct$P3VdnXul{O*luT_T9RhvRW^RZ#&2<6okI8ia9BVOdu6c{So zPn{G#o2==Y{Kotq$6F_H>nTzAg~6gOyK^hI93jD=1}b? z+oEjrDa%l^bzr*VVsrImigUTJ$-R$)UpPaEGdCI^Lw^xK+)zr|6d~vOZz1PUup)==$py0uTMtjY-{hlc5eRj<(DsSu$Y&7O)rNZ z5OCQ>yVKqk^rD6RC%~aQ#~!Zct2V>j;na@&xn4qG1@4lm=z7r{t1g;cpkqANz?~D7 z)HUs*s?R&J;`ZJP_OLsv__40h2(m$2D>T(tQ4>nR+SRm}@}GNjpmJ?aIuh3Y2S$rr z41_fY&=D*1k7e}kz8D17h+%**VW20hijk#gRxLM2-me5^xu_CTd;ftPjKbJ!9^6V@ zc&sba<)Gl6!PV$jheCp@8xN*C zkO{WpAaUJ2=|*ltC4W&e+dUi(lLSSOxj=l&q3l zUke$|s*mb|a;PCl7Toa%>6o~Uy4BLclA>*`Lil6!*bT&Ic9>&GG3C;uJ;n%{M(r|) zdr^+twW5{NqI|Q+g%$zVjh8Q4-4fw_n(jJ0*+e#(MI~mImxv~sQ&}atj~RcUSIg9; zW*uz}ffH(~bm@V@r4Xu3OIuq`@3c3vy#MIi_q6LOdBr^leRx%{kA|uU?vctoK&6M7((hp?-ib^~1Dl`FYU&M~CH2Q!LVWJ=3|`5u~5$1%3$C@ONYxNOZTU(3Y_C#3}J-gI7yF#J>n> z!*@1pn#NLd<_*y!6KFT=Hz7-;uCf`B#nle>f6q*)Zy9o4{VI~74K8r}5bYf9>?3;b z=Rk7^^y+f0*MMI0)Y2df#ip+=H@JsdyyYJg3db&&n#e|DNI;}w;f|Z@U-9xm^CHf9IeYmZvUJ#tB`P>YC1>Pnp!L>>5o+PViqTF$Q>lj_b{Lm_?mv3Ate9613?XJt;-cEZI$ z)z_>9?StfqMakTT^4@N1d9r?ued5UJ*1~Rbg{w7`vyAb>Eq&x#;dB7(si!2`J1di7 zH9&49>*NsmU95U!!3BFeTG$fA8&H7FZ-~U7fZn$ za+p_!cFWkPE+1d5d5lY4wR->IyuGEy$DXVvYa^TNey;!eO_AsJK~pXXf>x~Mx=C?z zOj%BtQwTYQljy@R+8ph&hk6*NCHHM7mQ`E3&}68kyP(9#ZGyuW8M}AJH6@w8@VQTyHu{WFMdz@ewfl z5_n)0UOlYCUV#ze%{bRSbpeCy);v132XKvie2w_x_9jvx0VlJRh0+QK)HG<{1#|K zbNgrux@w)oL@QLoT-`8oK8;D`aKvXPzT9H4jgS}Wz;-k8W~}%p2sIV0{Y%H#E+i5D z%8BF|nllVF}Q zHi^7rGJvv^;5P9@8BEWPFIbo`BSEL_1b*_j)x}{s%J@1AVemOd$!JBR4W*h5M>95!g z&{nKEim)`>$8tSod9mK}2eVeZEz5O~eC9!||waiO~w3Lb?@pEibz` z0gh@PdEhcV^$D?yL+q5wLbLBaVR&Vu@n?1pWyt(Sz{Img@wvdnD$(fVVK#2?oa#m( zs)(kOo=XIZj~5$~ua=R472X=7(A5kcKLBeztu3cL5W$_C?2_yBGDaV*bq zr}0OtI1rY?VCv`v!C7+;3eAE-Dp~uj{E0Oniya)~-0dO&fR_ym({D>x-R6L(GfYg}4aOj1<4ESUCVW_)_K|8(fGF$`J%|I+m4 zQB7WJ+xV}?Q~6pe={cNAMIi05mG+>Z0x~A4MWwAmTB%ZI6(tNpzyKlhsZ>)OKuaw$ zB`TGOOtlPwgd|0fh$I9IGbCY5NJ0|GNapYHeShD7tOdzp?PouG-}iN2gM6L1+rK@v z%lSQg1 z(pq>-+^he+9%QtzcF+43fo+uT5<|j$@A&4LZqfF*6d^Oho}*<=g2P|h#CFXGXdSvf z&uK=~Tj%#O@?tEGj+BN_NsO>cQF+uFULR`8=d4LZ3_-75j@l||(NI9>A`0oc4s(VG ze1cH4I$ekDg!Q&-Ygq3UwK5KMTA#1)kWeS=tD{UA4{(cLT&V4dl&0TK23ix8c}vY~ zK(5c?{tKxH(ELc7aOoe5#HT}zRd2^}U7;a%kq;S}ex%JcMXBjE&V}}D45idHM#FmU zfr2}ywlKfkokjImwUCQCQe1b|dWak0ngTBp*db-8&3-PL+}m~cX7_hq4$2y4n(Jy)(u7Jc2P;& zFx*hFGq#uG5K|;ucdD!bIUF3MK!jG6cWIpWJyRU)D1kbd<9$!&bZE@iYc#+`OYt)V z6SbA#Uh*Bx^;+1L3)_)misf7}rM_2XpLFN^Go-GZwC-pNe8REaYZK!)Ps%uTrE1Za zCR6Xs98i5l1+q!)m^eqMti3)zeR{%nLuqaufmjO4;!$G?S%7}O;T1U7QZsxD^m=$4 zp)M)u=#w&JJ7pb3oiN7aMqmBR-SpLVs{e<6vl#UMP+tFhZIa`7yOXo4Yb<(zi~-2M z0b;fI@%7Cy`0wu+TgN*BSzs=Wmf2?i1ELqUfnp?z+_7t#2y})Y!?PWQcFX|Mu4pBK zFvMX*A(FK8qBF2T4z`PHL7cE!iBS>>-p4XB$gdSYY$%n`5Q-7xh+opT=~-2_*tTHM z(zz>VX`PVrHr;6@x(c9Wd=&FpWSZc>$h?aS#$a2FZNs;emS1=AX_tjm^Tr{#L<#3d zkyUAuLd49;dz!LJP-_y)dC9JoQCqT2T(IU9>DTj`ra>aC$Sy~a&MeYNGT&e)S7Po2 zLSUTRL=wjY(|O_N#VL|2$Pm!&>?b$j_)lb$fYaP3(C`k$PhacE$HJaG=6 z9;%9l1*rtKLu7Yt0~L2T+s^~q8481@B!s4KowL>tKMY5MG=Efs1L^1ukH{hi)zqi7V1@ETN)b)UFQ|mGTQATtoO=|>x zn26i>4VGT!%923PCS;08z>{Qnm-(#efrPDoSv~0K>hH8e)as1Z|3F!;(PjoQP-5rwX3^=E5m%+M^2lrF#s0>hCZ|%)-IzO>6))3 zV$f-bq*QDjX8zT8QbP{@5Cp#LTzUPb7U|7%TH`$dP%-#*CK*wp5>gSJxe*7)apoquE60^CALN~aaE zqIuVdQtwi;E6n!eQ8iUzJy{WL;)oAN`e?F4N{TB$T=yfsxHM_=YY)<%6O`G)MP-%X zTMdP%20(*pca3A_75zPbsE_6<0p^$m2QAR8Vul)u2Pmw%)23uzQ3BQq7*BpDY`Kz{ zM%-zH8gk(}Bj4+Ab@>rn#NjZsC9@$8j*K#m{Zu)I%e(Rs44Gz?$9B65=KlG=t_t=(%+kYUI9}uv*{ZdnjZuBo6lVSl~(cm_S zc<^nzM9_578ggnSj|vWi%V2ObaSR|Ih-72@j$E&P-5+wJ-s0P@fp zdzEWTnd&DE*L;>$Ajkmte22R6I*?CT35*_~-5(ur1Ojuc$fk$5OZ#?QAAYz~S`z-j zP4MBlsj4PMr&lJpECv~e?KwFAinG+e`f?da&J$GW;AEKSn`Kz~8cY^sSAZ#c}7Yc0QWlxm<3mjQEp! zg><-`PDP6Q*=#2B6iL=0ZB)X0b&EA{h&A>5U+hC@=#>h%v+YxYes3yD7YWOA`2HPc zMx}O}@A=m{WvQ5}7;*W$WADxan87p>rs)fK?9`BK?nvjgP~ZbT&QV$}TQSRSvq zJBN@GTQJFh3f34U&&MSvVdaSDUh)h`nhBF)8=w^Ok?1@0LdApjr!>F#f@Gg<{-Cbh zIAQC?@rb&1U9iLwPtT3x3I!6wNC{HK`x@-!IDMc(eX;r0kYN6de?WUO-<`L4>S%ad zGXCIU(S%4s39!ws3m035_^xU~gvXg_& zNnxpuR~m_p>|BLY9k<8KiIo;Tb4#m&f$d;>91Nvfoa4mjKfWmlw+@r+?zxUvPTNho z^K?(S1izSym!EZR_#<8$TI0TBlIn5WeK^b#jUWlMTF`*_U0zkYwb3WP8@_pwVIQ;U z5tFEymYPC~vB3Us$4aRG5^2UB<(O{*WU*IlAr}+CgpZkVm zco*Mh#%wR|rEuYH>)3SNa`_*qlmZSx-IS_&{7#X`v``W0&dz3h#0mRq&%p9piLv(B zvWrL$_)_ET-+4#>p&>Vo?!jaNdUrVgA{W?s7n?Hx3$6EVgTZoT7+`aH>`s1;&Cztv* z0J#8O-*Q`gH<}EpZV8zIsshO*NK$VKl1LQxHgU4m5)RSMRYeraenJB3cDdyxIJqoY zmTT8}@Mq0gGZL#p12dgGdSt{(Ts4wPf;bV);wtpNQl;j(hgz6RO{#aEw!!&H@e+7u zHRiT@$#qoZjk#qu?yqp8a1_#;nci=FBc%rwWR}O-8)(yGfG_4Jt!*!3i zou&*}-6^}Ps=CT%jKpghYJAn=78Z)W)HR$%-S6P7KOg#06=%A<${hoLoTK5ssAO!8 zkAr6GIKK~TkJ~CKu$u{K&k*@j8~m}LYQRx&_8)bK({QAzqpmbNd8%V^UbE&WJh_7p z2&YO>Bumv}G-ssS`u2rOmml8z;^A0{$w`Lbv^4vPDjzg`0;{Q9-^O%-Yv-N#nG zPx`6lO#fa55yJXG0^t8ZwV)*1-Pt$%256Jw|hg46AfC+Sy%n(5l=(gwMfllQ}ZTRmLT48zB zSeJygFKX_?orGfa$~9H|L~JmQ0*H6)vB3tZ8_kb^qWD7bnjH1%BB^#s>6wCgpVPmV zu^L3zEnGIWEbHI04+pokP)@NYqdM3Ry`}0jH@xsKg#~ z@$DL#3WF-gIJVYg-sZa7HM5g1cTCw{)r7Llz$j|>*2R$ihMtnsf=HNIotQ>%rcG0m zwOGxZMZ0Ii!GcLXIQ2LFfB2Uz^*On#JL$SP>aE67c#*;rl8PHgteO$Z8j@SK<`cnh zGUGdv0D)Tcg$C|Vb{zk9avlf*^il;6dohUXb@PZ-lzaQ+Y-xRtw>zl-x`_L&$NNOh z>@ycQLQK#cLXx^C+X9Wv>6=p{`Y=^Y;)8UYIgQ{<4potuTWk~b;HvZhi?(Q=#CzdNCLXD$Q zKs0Hdy0nby>QfR0cOMvAxcGZ;VrGCKb7%5FykJCOhl@W0^m>4ue#GH(f=8-=psJxC z__HVXboAH8f#ip&2K`!ntPjE%68^x8#eMJ9<9EQWa2NSS;j(oa3DxaYk;ftwwGXy9#DW> za+!lq0oZ?>V}1@lX+PD3Ov!+S!7@Kb;>-P4a-*g?X`9hj@;0p$$uY(h1!Sbp}iY;j9x zY<+q9WRpD*Qo*SPBqe=$1wDTsD_mPSJbXCfJxL+f1)P-JQ3#2v>6c!rVZ9heK9Ho2 zTg-lNn+kmdMA)j=1Tw02!>zzjBD@ea;v!2v>loT(4^3+h2&m=gw{^CblQ79dZ4{79 zZe13skm54w{M)ax^tCqs8?3#%b*z`kBL8+{Dwa6IY&O!w@@lFnJ5_l)oDWw6GSwJs zcreOwuhmX_fQZi`64Er8j@fGP2=2PZYKREmTCWHC?r3nbj4pcu82isnq?nDtCxD#a zlZAk30aP%tdvQI7cm|#pgP|_YhC+WQmG#ybCfc#d`e2?pj{%_09s9=ocoZz&s<3tWra|!z0Sm2&PpAp^pe61I9e7(NqSyC}&qH?rdoH3Ga-3!PA9O78N?Z zn>ouoq`5pFR23L;w3FHGRM4WSq{m(Q*WIkfEL>@PTL=J*kh7Z zAQo>>KdpQnndJ`p*MvlGx&T{yJ#dn_ay|;#IJUu86(u{{4XDIk^L*Hbu zvtiO+23f}YH<@5P#;1wPtl20a$7TlB*-ooCWCsX*euT19sNR_sSu&cj(G*}eYC8CB z9U&jG!h_w3M!w+|Ob2c0TRvCpkUkd>xz{9h7eC>=F{wSj)oM73R$u=AvD_^=KvN^? zj6LqHL5g!qeMFJLo(DLd+T_eh;oCA|wRpy|GW3(wGRV8wmp2x|Uw+W6p2(AFygH6n zjh>QZOit_8i}q2WG!1zNoEjD-DW=Rj@$ZsksVJvaCCge(6*YzK26*%!ua03z+oo>x z6*&NB6aqUyf$)O@=N&1%<2S_PczIqQ6y$jK*mke`J9@sa$!kdlLZY9C?8Z)4G3~xN z-NgXZt7E_50+eaifT)(pCP^Z*2kCcqneDwBd+%g3E$?kD|^Y(UUv*;5&d-qSx7$N&~pWDl4aJMIQ z&FYMOQJ~GfE!_~$v#1D{E@F&S^%8`nMu0nPS%1k*$(YjX>%wH!*z}q6B#VeB24Yk4 z(6ms=CH2UZk&qB{!w!UooHS$O^Gr zc-D$jz=Ahgk5!cS+{&1xvV-Jxb-CIxcq@(xWCJC;)%TnTCNzmtrhB@#D1ch)lOJ~+ zg5R1P+7I=JSl=D}ZM1uD2d7i9vT-eO^)>rNC2pH8*(c|U7`A>p7S}?$_bGbhwTbHr z;O{0`hJ_|8nKd!Bu0^%zHJI}toAQ_%PcBsmA}HvbGwE)T-oUli$qYOY48Gj&2r8Yc~t_ zl~JQy#rCVHNsR4w(aRsqz;OE;&VDN{s+$cL0BlXAInyHm$nz0Oyba?U>6EIpC<_qT zb*LsGB9Odj{Yk(r1wHDb9k9s3qg*1*HgP}8Fd*&WSH(A zT4sHq)p;djNn^qe_bClaW+&E*>L{DR?o=oWoOB0dY|ZO{$V#fr%{J6vaHpsVZRPua zOj4dr5-?LT_0rC%WS6_d6OV<|-)M z^#f4Vo)7{|z>Xc#*#(-Lerwj8T$w3(q<17E8quJx!e;3Yf9yB_b#AwKVcNO}d3wPF z{aNqK!NRI4L<7-r1hhL8G-J&jSzX!_AZ#+2N@cR{0Uej-8fJq82u_JCUQx(a*)=IP zECd7TT?wXQHsOARn22;C=XLEVI*g}38lMbL*8JQmv+)Z>J&ko4D@*BMc4$vgb?OqW zjHs3Sdzr3Ji9hWzK0nDkOu8$hnh8@ec_lf{t|f(v*MFRJ5(=H z-8X%u8ELBOnR#q45ZD1972fH}O9>xG+q~b6Dxazm)Jgj?);0iMFpi`HEC;OH@jgu^ z>dY8=KA1U(jI)m0EE(YX@-_{2rE~d!nDgU=HJINlp;-nbH;Msmk83K{J0Qu_#0bDb zb&u+kMK|d|E!m`wf@I-bi)KBqslOsAs3kewwgve4iiD6(Nvjx1wiiXEO+xVpy{qLQ z|C=$c#Ps8`QzV_@gtbNq;nOHsf@W24%BzimaqwhAmkX2e=MOqiTxtf`-|KX%mzFX@%6hT?3n)XWZk+Dz7wfocQ0> zIG-kKw#iepSQ1gAfrEK8NoUwsmFOsL$&M4lfLz=d;9D(yt%=dxx())+K10kDrBx*b zrK?}&mWRSS8VMQwaG2j$!2RX?k9{VeZx;k(9S$4irXRixlO;HxuDL-g6+8s;jV*&! z!i1n(MG7Gz#_jjaTrCjY5f4CM>~Zt4sBBQHtT(W(CSg0#Xsl8uPR$f&z)0^$^@Ecp zHWy;Sb!%51BrlF1Mcp?mjty7#Q2d{aB<<`sR5^H$I!i!Z!GGA)h z{iQL{I{@^m+I1iKh-W$DPy%kxa;B)U$9@Py5g!G9C4_GiA!z+ViT!VfT>jDR`7DnR zp*rccb47Cg%GP-lzFS{0%)9$m;>q?Uo_J}P^_C|28*w2@Znc|k(KZ2B$EN0BWMk;}o*4a(W5mK@=d`;7-%n+gnZ4Ny7Af?t=AY!x%TFLCHNFlqG25lEZ>{Y^- zr6~~IYd9ze@Jf()@n(ep=~@OZmLxbCg6m!^}{1V8L8+*udThT-GmUQ45_GZ z2)$TG{BZSWdqXtqWrs5n*C~E>-n+r}cB-TJ%JZli&`x!8h%<2cP(SQ3Y^G9;^smL5 z0D|n(IbjgU?`7J)lc2eh5?^?C@%qoZxa2nvlJ~5#A#|prs`0$F0X+g(h(%e=0BW1} zamhKC`2X%O{tXDmI_%?K&N?_dQcXaN4w3mn#AJ{v5pM||)xhL(otp6cF$EaI6icRp zh}Ed)W`wnWEfe^rszL~)7rD8KTv$uCaJ-IJm?T`&-FU=L!Py|c-7QFxLN2fC6~cln zXLhw!c`sEw962SLSu|IdcR>`BuYc+nRvBWFjH8<=S~^=ZRK6pINnb*I)g1;Nr=?z`N@&zqsJJ|JFO7eDd(G#^=%>$=^0c zguLzlwYQcM@%vY&AHI7!^Xci#8~5)_fBWrQ?+Sa~`diH44xZwFw7(h>!;frhn-Q*dL&&W%X2ZR#9MutVB`te*`W#Z@>Gfmu=oT z)}@|XFv1$M+;P;$Fge|3AA4nP$jxdXN-ecl(~mQ6Fwu%3-NP4e=R+!YHT+h54X0fn zep)_zOxJI6#p;t1lG02${^F4!>OBLv5@A26A&H>b} zT*WUY{2>nZ3iH%DWlE)ybwvr>Y3c6Xp5T>tw4*Vtofo_@cfd;->%ZbH%R@dc>I%!B zT4eyFWAViw3F?0J$Jg82l9JmtvcA22DqWW9KX5k*r~)5(($;+tp(42 z$C1513oR>p)U%j4XVafL02n1AyRFP0TbKsQgU&7wpwHHRCdlpL3ptV$KQGd^!<`Hkc)0 zf6*QF2pxz-S1~Y+i}I_Y`ekDf3?j;Inj{i{j*qZj6-H2OO%|1_XV{YqiMS`htSVqz z{k~9f0dzM&IQ9wEr*mlde0JWqNJ8Qszc3H)*%O_%+M$wcQ8Yd%_@L&j+HMF}PU|O^ zA0%i8Y}&!rLeJZ7DS`*^t=1f~p{GyDd`p8cxLUj}ZPWN~jtie>B&NBpSO2gxM(m1$ zCT~F#@!4TV+=K1Oas8jV(>Xn5;9n(veH4l9>}76C7*WCN8?JY6y5`n&6mLDfg9=}o zydls0MOMJVQGsZoE_kn&mY&WH)9sTX%hd0`u8H&|(?-fAepth;yrF!Hu{L9+v~DK2 z7wXFr7x>;5-1wXH$e)~uFrDI7wM;-HK+-?Dw{56!B_Z>-Y?2y46` z--uR2DJHCJmn0u&s9_=4+Mlw4`8g|jtvIpfek)qtZNN^`b(4jXfiW|0wzdsX0Ic7! zm5MKK{vylxq#+Z?bG805yTXKd-LA$DgC)(IYhSy4KU^NNBz2S2Hud~CkCfyyiu}Y_ zlBzu2+c8s_Ye|F$+`S%re+I>sGl#&uoyD+3Aw%tjC2c+tm`XKl+hPMWY053CMB+jZ zERAz)H1#aPL%F@lu#QAQ7Mu%u#pyh$<1UGroxa}gwSo_02nGiu20=+UnBtghS^9t` zdsluwaOuG`;EO*=X~EhO8O9FjsI8|w`JCzN%ft&bt53sNB%~LUB_Df6;p@oDsd7fw zM7vcnW{+ckz-;MmPX2pd7D&-z8x3s-MjVH)y2^(hj^BxPpUm5A`o=W zvp8LTFR%;pv;Qd|L!5QFXaSWmBijeFZ`VT4YMR6K;mQyXN2KO=E1ctP>-fWTKRbaSzn4Z)Hd2$)Lvn}9T?HECBE6vbEcW&-A-c=+glM^P`%}O zWSFCv|03tS@-iG^is9qk@V=hH8{9JSE4melL2!Fq3H)fahG(tUJkX81wC7D zJ^yPk_utnK32*F)dv=@=RS=2|>WX=9P6&GJYT_$T0=dqAJ9))T%-z|Iesa(Ay1T$M zWxiy?f9s?KEvvz))B~h!;38nXg?25}^e&(AqCYP-Kh#u~D&!SEu$J?R!vIhcRb5UG z4eIuH6*P}ETb^{%(h9rJWV6$tPqzC|@sk;hBSCd&{F);nI8C$vvm>cu@9^vWk(P5{ zL)!dPD68&fNmVA%vM?aRU9p@ZV$)d!%5DVl{V98O;){@%z96Yku)k|qAMvwYb_#a{ zDIosVYN7-tOSH?GantsJJFxD;j1u$4G07nqK~|7;&|`BT*4`q7`v>7=l&T{krpxyA ze;k0+TIBNGjV*YiJjJ&zj3r*o-!$|I72$=#$SKBDh;WDD4`g4&+X7tIUfWKJ^&S0It9{y#bIYB6I^FMCg( z>|lk|j#m8|Jqu$EO+2JD7D8-#;r2N}xH;0YWgz=<5(L7yom#Z`5P1G+kmL~f`;tX# z&o52M>CP=_CJAe-T_H<(DMQTEE07KCcx|3yG^u*{BmY}*k~Bg?3@Kb^NLxOZ(#>-oFoLf3)(a*6PqplZ~K5- z%fr?amZUON@5LL|9|v+??|(w^ZmEJyV9dq(>M+9E%C}&E4$^1deTsj|Jo%nuPlRf_ zr7q8tF~@eAW6lyif`}MFoYHJN$SzyGUCNBo)}RQ%3#_V9v?jSPBTO2f-x1i<+oR93 zSZ_3h9$EqD31j8kfeHZ7ge{HBwZYA0%SuQG{{b<+dcoMeP76`qJBFM}GcyZ5&S zM@b9=goF~LUp|l*f#;5G&ONqw+}`Z?4O|4tr)KD@@uu(Vb56AY7NLI)C`x5xJ0$G9 zP3eN+s`Ub)GzR`;Kk^j+)<9#>K5_MNx|SxrYz`Rq>yYZ!ZkLzQLLKpMxQvBfI)q@t zK?$10XGAc=I@){hrh;^f-wa3A!&V;Fgi=x=-An0`MmU6OY_sU+W3jK%5(e z(uWb+L}&zB+Ym;qeRE}_4^>nHxBo{?`OGpmOAmCXT-{sY_vAkHnvhxWgnaJlX@w>-htUyB(gPrP0ZK@ z;ZTm|Tl4W(s*Q!w@5v4F+U*VduD48Wd;OPD2yit3;TK>2!3vxOOFqe?hD3QgYlWRC zgE7vL^8)Zv|I%CVQ5+SosHorysrSx02e%9v>@;g2{f7LUh=6Sw z4EWO8gvX1Psqi3xL3P&CD>a1haa}c0)!%8{$ENRj$ue?T$ig`+9IIz$=5VoA=)z8I zq_x>*8U4FqeKs1;3jPC2s#>dS#0Bd_T}6XKD18_ZxWsr$4yZ^w=}w_E=!Ob8QF0BVJeg+iz8z92Jgw0SHiv+_*Wz;@cM6=JHgR=2_eH9 zU4EP6BbMI=@gF9qH<#^BMoM0x`s<3esq>ZghMU&Z!WDRn1e6ovIFqcpPf|}o+ZP^s z@OP&0cdoRuY9_2Zr#xcwhs+=C)I*tN^`SqtsJu6uvw%mZEZL!W2UnTbq39kQ0D6b1 zIRtU4t)EIUCh|3wEH=YiJ>XCr?HmZL(^2Ep&5QfAbh_*{OMXO`Ix_BZBs>#VqJwCJwh9Ib^`hN`eGCnWp)# zTS=zxm0~aYkzQ}r4w+l@)Gy(~ux)pj0t*79jqN{Uv4NE#p5j}R8((^O2h2tGr^RT? zCicwDL1^hslZ={TJO4f|RPV5ed;K+!zdK-wm2f}iRI}R=iY(T0G#)tGyo!VO@zG}47+G(XE@v~@y$wrGJ;kMeX?;3`F?<8bFY}nMptCoGo4QvMm>Sv{B}Et|Bzoe1 zwULb-Z8AD~G!p>AghERhjh(%8#wkB34v6zMWEf(4!&*L2_Gn)hv@9E|$rUabM$w!1 z^5VPK;Uz8gJy>ny)Upd{!)2&!>1K-l(M{)}k==)ud58A~ZQ>+Wv8Y|ZK5xhTmtAPzr zPOB9Y+w%G*W)$?tR7gXrD9OA_YW>qmZ-0I1x<(QT;%FCV%a!(jBFLWBzoVA;nRdz( ze-&+q)Kgazm73%4pv$Wa0egGyQ;SB*6p&3|5l5f>IgErTDWB_ge$nFwABBDT>g6 zk{X@4zu?mI1rH=&&MqhxWHDML(K+*ZcsHkKd0vuoG$rZEX0ugNZV}^0L*iQgpBI2} z^*>Z+ID0wV&msp027IQ*a#7St^le!y_@jTFs{!tb#@*Xv-CR)mX&)cF{HCT`#%H4j zdGHUJpO5YMr}rzwZP~w83d=Wt7Rf&+05N0`RT$zL9hJ4T@C-5PHpsaVmz2%)79S!_ zT5-`lR0&AEy|?lJZ(o)de8e`_b^0M6l<1zgDLErkA4oHw>0Nrg4OFA-R`Pw+_Ill^ zM$9hTx*Pw-4$`Z|y6Kegg^UV;Rp~*n#%g3Y8z2{+a6eAU1jJ9!k@>y0d03Lkt^Z8& z&aOc`>U0g)3i59#2Ujw7EBJh%uwL4-dh5r~#=qd;x|D->R|%%b+C&)t_*$s;CitWD zkFSP)kCMvoMl$5qH%hf|@4r05hcg&eI$|u;uH({(wh;?1^f}@^DtvFK1e;uakF`8$vpe>6`Ri z-xSpms&DnM1%zQtP1>3^+OXi|dC@mhreCnKB{{cEpU$)1r>rFipLyqvfM7jaR!?PA zo8TLGz2*h@QA#PIex>_du-kGXVtQriko$=_)>%vl>YsxCa%w31?g>dxc1O2SG$0GU zW34gm)i71ocLgr7NS62Fi;X+f?Om|^ve2gcAob4R)K>u>v7>oQW_5q+<3>QOfGuX+N+q zNr9!SN5h^a6z5O<6HX~IzKWn_FY5myD?>0bf(p$JdRD~Pr)u_6>!*V>5Q(c_ObFLa zZVF9)1xF2QsIxkOuPl{DvzMB~C+VFnN{5$e#7pV5PLZyizwX&Z!+lal1{Fyr$(mmK z1rOG9liYR(xbZVUhOi^fKZ$~K4aw&PCr|mSdm#Z00Y207wQd|NcTC=rTPdY4$>p^) z#3v&Ucbsq^sPoMwQnp~D_5 zu~x3CJqu_4wPCz0iN;&ETUvA5xe_Wa**w7j`)Qgbdz?42IU!+WpXq0mg5cse<+<^v z4`@({WM$%E*n8CS-Qu9!e|wSc?Az1_`?yNiK5#y)mwn7XGmb4UYkOp}>(^vxgs2Uj zb08X3iYVX8@8}-X*9*f9lTXOQRWDs85}UTGeob2Ka!Y+3#C7_{r%p^yg^_h24tHFIW9gKCw0PvR|#s z5{n}fYG1+Xh8J(7=Ieut4Z(S@2-HWR9lJT2n=4RZ)E;Pl1b?wc$rhFG`;21WP}SHQ z8gymMC~m9}!&S+Pv%)mp5HEPN%uDBqC7S5k=onW6{Efau`V+Dhg%1h$0oZ|!h2lfh zl&quASk=o)aY044>e)1p@>sK?UwRmRgi3*Zh%8GGJcpMdke;dfqhRf8@vx&8LbH0# z-lZL~&$gwz4#2-BD~1SpzHi(E;-K@t<=&K`;9~~evivGG$meWH<)$ggp$-r2`~a|T z!Jt$?<-&4dcZwn<<8xCU_-r{7c>(&NdP(rb58F?Cl78ep_}=o7?<(5vZ0|~cQ3b9G znbw6jDA}~ZM4g5vYiy^MhnZZhjxaT%zyHjE`KMyMA{2s?W&vlk+wGJ;dM3pKQ>bm? zSdOkN;GUEiyisP5qg$@G_peN-K1o%XZ^ZRpaMB6)(Is)DZRSXF>eh32<84;OCw9k% zZ&?vrHmRh-ao0@7YIZg!=TJUTN_XAQbnsw!=4{>abB5sfUQ zQ?vYZSfsaibJEZQ+fcC0%jS9|Mdj9n%mf*X#0>p_WWgZ%LOmOfa*8iCVBOmXQJfk% z6`CICb5@DT>g|*MB5QlsrgUxz-$9N9iT={yG)g~*dDH}x#83VCjcjbGT2t)*VQQCR zkx5HFo0M>!TD>9SEXOO^Ey@teM5%Nd&+O#jQj6GCa-SoS{y_o!lJ*fe$Uw%o1P4^N zI4l4VD?I5<;EHHz9**(#N0alnDg>x_k{)!0s8R0r4}cNF;hbK?&ln5wEVdBZt)=ZE zVjIqaELC8fqY>^ccxG2U(<-0MCg0X9x%{7PdrCcGHSGzX6h^F6Iiv@8kvr<6Pfb0m zCYo**WvuTmI?a(_1Za+RBjzgK3kPMd&3_(49S@69kM1jnxlDN<3d}}6@yD*kKCapD zK>ihu(&+5P$g}qgZT1Tsr@C|2-yp^0e@g{#Jp}DQZoG?J0NGpHX3*gUPP<>%1;<1d za6!f;MZ|CyvA-pB514*V743mH51GQeZ)^q<`Br@!>X3j+-&li zrMbYj72-7)R4cX>T@6%-5FhCJs}r&pJCCLBHLXt z`e6UH&{>-E81MqtVzFUw)?Smq9ZTjqEJklHz%g_a@zH*lDvz|u2bIV&?@IjR$2EmI z8np~w$5v)_n69>yL-}N8C|!b7rx(Hr2rp(hd&Q)f+F|sxm+*0dvYq=X)$iHwuh)Af!&8$7M54P^NDpg-! z=AMHpvX0KLmyjEMCff~503Gj1@QwQD%kYQ+Kx;>SDCPe$Nw`dWUWMm^L<S~UF7I`l&4IaJe! z?*BrX`>9$-hF;EY?3rq;2Wj;w-SL$)D6Y#VtPqdOv%HPUSZK5g>(HT2?B=$g25;=D zfY%m=-b$GV9kv#T8O3w{xrC(+yx-qpou+Cf%zA{>#sd3DdO@1YH>m>lYl9Wi5EtxT zl$zAx%T`)CUH|7O(NT1M+Dx_tuAAA{6E}j}qywK(7@?_N+UUBZI&Vpjxeowc4bO+p zZ$&y(bi!VurNrMWgPh_q45+w3&GS=kfzzRyMTM-Nt1^ z#12yvxY`Fo7QG|Z4JjB`raZ%y@@vCI%3It%S*~_}Po<~n*Xd-yf^(y9oZ0q+WacKw zTPs|g)%!T~v$tr#a0-T(uBtk#p%_ z^lnmz`F;yEWrHEDK`U$%?K7z$kNdlgw4C*@I@VRf?9^;oeHBChe<~w)m@X0?`YN6a zYp0v_!;9pCEa~&sn}(ly6&q!#Ki+@z-XH&pd_UNIz4m&WyWBgVfvc>i_ZBW?@E(=~ znr>LUwR4+yr1@lMIOUKs1<|&1z?%}XDmoS*(Y>Pj z{ACh0r#Ba}D>(xmaCZ+FT0jJ$QuX8ygwm5IisM-)~Xlw23XJ@_sE zYCL0~wA{6_SULJv8e^P~JU|j?uuGHEV3$o5WW1t`BrSfzonjWw%|6*t493iwKmjrRr;%arZM6xd5sZlkY_)jmqZ$a1et z6Y}LKpdtVGo&5t5hZ1}`x4HtG5<2vjzsKq3Bni*Vbv`KuTF-vPrL*7GrWo0mCERF@ z_k4NyyVAdQYi=S^8(?@2$#D*<|CU;+5+Yx`n6e$GCR$Thg8MoFquSAu=NM9PEg%*o zdlz}ROq|31Y&uH#&R4Mye++rf&{`dCuQ&d#&Uj`Yh^Di5&fs_t$z=GXh|C#nA|un+*U!8{(fRE%;Q7Cl(__8gkHtp zk^bi(gs*UPH{IXFtH(^Ersb+Kk>nMuXGJcwC#FsAdaXE58!Toz&$jJx^hy4jv<~lM z2miB8OhwNZCe>S>MhSq>Kr}^!C;W>1i>W{}E$pvZL70Y>jx2uT*mi#Z(6VrEr)Q_` zvi)8IYjNfFg}PtpFbpbd4BqDL+bc0+jrjRv)nWmDt*0S4lr&W>L{IGXR6T8-WQ9wS z{(cN2In~RMGBNR3iLI!%3Ae(Dk!B>A~qgGTx*G5kIbg8H1gZE@Z0!Rcnf0m zy08Wm!%C_{Hqm$YO={t(#nkwd>iy35j@{Vx$$EPCy_nlD_JFN)wWZX*8k13cT2rhr zEL6ZnKy^zaCokCb1W%UwmHnkICA}9L7ud6h(vl*SFyc{DA}}IGFh8|RLXG&J76H<_ zk7tqrj^lBfGe`#S2l>WPN?A?6i{a0>?i$^>+`qzq6#p2J`13>lFS241d5QH0yCaHf z44%sG?B088Q2jX%|3W^;Y`ze-EwNsc?+%xWcdGD=nclFln$g#crW0=!r_zgGs@35M}cC zO(Zya>a^gya6vBVMO(V~tf=e}O%-aHKP$G6BuJi7fL&SW9Iun+`}Qj1KDuxjRo%K> zkbYgKJ3bY(xx$oW5vRg)bG9fFMpK#9d^n(I21RS4*q@l@fC~O1NwS0Q!;XXba~z}d z3o9$wmzjoj@8Hb|$$zby5xd`puU{vSftFQwvqlzoQczYQ=m+}49oLzmZw}<6`|7Yx zv3uKmm*w&%wQ+Z2qHM|M+ti<>5uoFtqDQUh%be@0hB9A*F9ldvmBe7 z*DLnuAO0wxf-nF@0JzS(vGly@YaaFfzUhF|!{2GT?YOZ~AXB#O9;dSum#b~k)_6q} zv}UoPTw{4|ryEJ5*!{e*pHh;Iyx?fP85iD>()*epI>bbBZQTnu5)B=ZxEH?mWY`y{ zz5}LW(c1C%{>7ImabBQz%eJ@}0s0}eQTdL0IMhY~nX0^X*$(62N zyCa!@;E?wVA^8zQFNCP?!i5`sYK)u2xpT#<7^MEth|5%ujyG1g=;ZNBl)W)65KD*8 zGmuITo;5TSIfL`o9V}go39-$v5^*8K>`3`}AI(}DAWNlheQ$Vp$Y(k~JG_j;iWm!& zPzDmoK81%0B#kpnneNFZklit|@@S$0)YX+%_{8+?TxAH=_-R95f22KQtE4Fek`ZSg zZ-C8;Pe$N2$nO2df(YD8^Eq5t1NEiLoG{;-Q4%11$3GW&A;_^72CxDjWpcXn{h#=6 zD#|CL#9x6&=350JECvij1PB%P;jZ&Oh8IwUD&Fiz{O`FZDQL(YX`rxTzCZea;27kb z!b_+;KxN$H`l)&p)AQ%JaS)k#Tma$K>7ZB6EoHELp!Y4$BKPp3+8%`F8JGH$pH7`5 zPX1q--aRhK`~Uxczu&do-d1dLWh<3!ZMn^*H8f8g)>^sEl5MV7d7yHMhZGGJ0f8-7 zc34um@(e3WW*(?{meWklOc50oPe7hPML^_y{$9O*pWolyZv1g^T^G;S^Lc;XA8Up5 zdX4!@s>h6IOIxI7OZCa#tl97=UYP1Q7TsGDL359ywl(0Pj`f#4!bFIsb?@XVp3^aXQjW ztK;pex0J>0QN3-?mXD<^b{=yKu!CvzVY$ezXIA^CAL~sF>a7~*X58RBPL)J5?xbIM z#fkj$*Arl)emQY2X-O18d)Jp%xP2~Pm&F>L>)VfGKpzC;^qDDl+jWOdX@33PP!}U) zZ3UuM!`c>_Ak~-b!;CrRJa*I_Ka915=us@`9n+=&>IP2}IZ+jU>@!e@>u3@Z`QTOS z;B8_2?9$l6n=IgOOY14m<|dsXCTpd%5}kZxi>L3_jco3g}rsmrrcy05G-IiGkn#y4tjX6jhk z^#&voAl(#?Mwb4zg0TK02)U8RWB+!7Xj2IL-M^8}LO)c${koJyKCRIrbNq6)-S_6&~-)6$`Z2lzvCK;|dI{*=KsDQ=Bmyc61jsB~U<9>DDKMa5+5hs`gy&W@J-yOmRe0d+{PMx@E>({4~^_psc4ESLpu zlzsnd`S|I=<=23|!-qLbKofHU^3?JhaOkuip61XWXU{30(JLiknn?hjqSG?zeherH zLFF@{#R{}G&V%h9+TA5LJezcT1__#7@-R)N3h9EY4J7SBZk49QwFxBqC0uFCQe@_Q zR-j!kP?l5^ARTunN}YRowzZeHuUoKd*7VcaF5FJR!m9vwSdKrPuK&?pF52l8zn2ho zK}&DzxeB@$A838KSmV1Dq3D~=FDEbFj7X2ywS|Han1?7E$WSc`pU~N=I&USbKw8NP6bu7nhqJmEJkYWE3Z6W4e<5_|)#NgnyO6hI zyjhKO8@s4ouSKr%Qi#v<{o=%|&($Yu9VlDeqP;-UuS?@14E3Hm(qM5dixb$Hx1LA;9*EjqFDdhh=9NR%d9K?j!wo0N`Zx~2rPJ~ITVT(_%e z0rZSHa)#2K*WO)J3@%3~8#ObB(H$p2GFW(2nzc((!PfM^1hyVF#IQC891rI20sY## z`TSalLUdlc0&+)Z8F7Lb-pV4IMN#z+(z8j|#hjIo*esIH56_~mKb*e*h_m`pz!Xe7 za@~dd;ZA`Eo#25467v6cLNP5y~smtYAjo!{jL85yNC^Ek+Oc zh<88Jb*^P)V10mww~U2m6a=}3ob+sIL0qHlg{E}p^zO^USMzHdAGMy~jASFiW>Toa zNIxJG&JPinKq}+UyUzW5dgD&t#B`prAlLa43JYj1E#Geey3=7tD_fgEe$uI0OF{E= z_CU6&lY+pFkLy|y^zj;Cn9k7M20y|ShE?Dag-fe$$v-H=e4=2^O-KL-lMxs-i-EM5 zVN3oAmqqMRBk9ZW*4yFKpT5<$lIN8Hl-#eK*L>r> zavu!lA*F|Pv$tzs>xphPWYk|zw6d8#v*!1}$4n`O@vu;=uq=$o9lht%=#Th~v+oqy zBbWh=@NNiHywI^$^k=$0Lb+Rq`cT{qRj>9Ha11*ot|+0W%VRpWfz?0q2{-IS<{mnE z+E=czQ|!yQcd2@uaIb5-kVU5tUW=a3&z<1FJ(A0j6(D{^%ad$#MyTC)rc&HB{0~Rb?tKNvOW3Tvp`IyKY579e0z7%?&iS#foGiAA|!T0y4-}5I+jXV8I}&_ z`m7&me%2sVT=UQVn(g~&56DOn_{tf$fh1U3g19G+K~BEt$^Xyr2bmiI#-vm5TG4#c zpm!_y2Wdz%Fh^^#jyzqO>SZ|}55*VANzV?IMYOj}$sB+~)B@E!!l%?$#m9_pS%>4*!ttSHd+W;1Qfo>T={YJ7o_ztl~nq}NS@BDiA|JrguZ6?sw z$%7(Vza+D_V}+AtLefedy)%4&S%Qv@>?IoDArr!$Y2r+`Q8@J}Vmz9Em6&Gn6$(9n zPmBz<7_VB`t+`e$2hch%nHN1;?Giuji{PM2QMs&n8*|XDYFk&$jl`8CAm5|_O+h62 zSCE#;+6~<)2i0waGgbmK9}n#3W$~_IRDIcexb(_ zZAEMuaPxi{b$j^nAC@oU~jY@+tFUA_|Q_$%J{jKifMC0r)ybXNbKg ztr6IQ!$VSWhBQiIE$60Tsv7{L<&yA5Dl7q$|Y;F zG;ar59ImF%8r+4l32T2h5l@Y^PP`!|hyx%x%OjPIZraDVde=W27!R!1n}*4H_w;3j z-B>;yKa<}LBCopJWQqFwjT#-*wA%WiW6cfz#1&IQnGWWi;XjKMQv7ZgA89vXoU0Cjps`Rpy^f1g&X zs1v2#lj3-JXHDM-pdF)`#NbEE5S&x(nJy3E7AYo@-2v8%8DR?atX7xY;wXt4FCt$>@GhZSmuTdlq$yd4QRKFqycn4GCgK(% z&-5~1NJOy&Y^0bzm*JZ57kHjg1jyVP31w;;YlxVtJUFmrFmb*^@4uf14zS(n6L>alvyGdEap7c+P4=*TaYl zUa>%_TbbB{j0*&<X8LWVm2j+UJ=G%3MM%#=hFVBd1ADE@cg1MQdCrEN8pa7kX z@aNFJed%q4x4Yy$I_^=*IGdRpDT0W^o&EVmA`yJJ?rg^>ZlLo}^PuUn_38C%;{$&Z zb+tTbjW_T(kD@6_h3_^Z;%R@v+Xj2~l^v%Y;_$ds>oFQHr3TLl9`y_#UW@qPL_$}8 zzV*SlMPGfuF+4-#D?S<=*8E|qN2?Gdy%Xk}CkC=?&}-wx#)2^UHiY>y=d+1mR&&#Z2eC+Sg`2b1=^{5wwLQ_oM1%hf}qJ9!x=>((>n;&~XE#qO39z%_N1GLv#Ud z4gD|^FEeK&)z0Qd*?s+0Bz5`h4g0^e3a;ozOS)hxpo+z- zP6gk2ovjJ69THo%U9-P9GFO4;EZSLQ0r-WJ%U225e4Ue~qC3`A=xol8wSQf-&_Dk? zRu_{RYlJONIx?hO&5gljK~*Q-e;nWM!778_s4b&s1(uc~3oRE;aeNL^$`Zb2nFi+3 zf@B_MEtnacF4vX}m-VrT^ZM1iXMKWScW4Cl&E zDi{Xyg*`(4BSQt(@=viE_eIX~ZC=i@BeN`LZv}BJGt2>1`wH<(7Re=232anL97bzxYR#vQXPH0FrHHK=z;q5n$)f3))V)~chk3;sF9SqIG+_3Qt5!27}uOHAZ zWX>i7WuTK>v>UDepiw!1anT#E85Tv@54@cd_NCm5)-LH`;Z+s#hVRGe@F&iFv4!!9 zNr?vGSV{suJXrQjeFm{Za-y!6YX+%`rpAxjf%uXflgBDo^$k8rHh%4W@F?g=an;tb zGv=h_uJVJ8U&6^Ky1LXyHJgvy#0q;58Rem#bGvf#>r8*~vr%Tm)~Y_hZ)dcpHiZri z+=)=l5ES-MNLh`7^v4kHP`I0pz^ho*cz?kL6SoStz^EKQG!9qj18Z$JmgSdo2e5n| z>Z(MxH6*+YBbILIEkD)E5xVJTa#RY_(fX4;(1Z9Y^y5%i7q+LSl1OCau3mWL3d^?O zwhHNvm5VyI(3vuBPx&fm5ItR1#Gf)qWNT9rgtZT;;@x(g?w+*B@!WKpWqp>$aNRgc zH8Jw?Ov7ll!|hSDZLcO*;6!}_*R^BtRAnzbr&lKZ!`c<1b7FYS(bRyWf^$6Tsd6|( zqw&ntOgyRFDJlxke!|2L`N7Ep54s|%?oKQddsjRRTaYqxPgY=OLl6r^>IP;5ZC;)7jL>~%CZ`4m4RRD9#1FM|6KOoM4MqcAQea&A1@v zF>@Otb!f*`>=fC$idO5;3`#$WYhTq}bNt2E@8>_`!OABM2>wiK*lxqw7L%L?TB~_hRkWC%0SK?y|zE;T0gYqg<8kp zJ7myhSnJ&C5`lykj?d(6Ej!&Sp@DN~Ku4WIq=Z5Lhit7sqlahr;r0k=48XKw>0T}Z z7MlY*3+Hbk(dG(ET?`TED_2-6_QQtDQ={780JE?>vH7{Utb0WO`~q*?-ap~{%qu(d zyXdk$?M9m6p~@)&Lg95?Y00EkBmCPZiq~^iqb|k>O&>!eG&9;7Tkrf8448W#=NZW9 zG~%TctFcyjO!wZ<2Pz_M&In#r#!-o`=qJi)~-G#{A;tj5kXSu5ye>rXz{7})@{|Vb*`}VT} z;%nuRGo}*jKv$?9f~Cv6@i#Qz|KpmoQHU;Cdah)Ws+*0*AfQl#9{AoA%5@PV)E01D z!QbboZbaXB4q3!zge?|a1c%s;7r2xf*NQq)D0S|t^m&F;I9m(x8>+(f&QqTC&=Qd* z5BV9d>BMHi-LAvsAKeM3z7u6svrC>36O2ZCEhgm0m}S=Oe)RSsi~(8`EF(78o0feF zmwymfX*LuSqfC*!Ddmq;(<#AzVa1(l#dV)0 zB=}N2qFoU{6vh~X@EigC8Kk+diLKk@D+I@LKQ2sS*enI|Jc;E z80T1&AJuwfIw*F?>j@87#~W5Or4Qaz0_jIOgQ92rJ zDb%vn2J!FUXE?)vwAKKTm8ZP&$`?NFz&> z_{LsfEHnlmTZ>1U_+lTE641I(1Xp$TvygDlE_Km_LkmxunUe7yn@(lc1P5###B44= zRcS&Ch%IP@6u{CT)4Esu4XHRnAY&VNMW$Gst40YurFor&TyOYDIExDX)DQ{d^*6f5opCCh|ycQ)MPHc(TWJ+W*R16NeGpi*rA9 z-M>y7nS-72ko%j#%Yk zCqVL_i)EH@s-jO8;fH8Y#2&B1Yp%14-{mx-iS^R{Ys&UGEHA3>kOT62Jy=eXQwPZUC}rZdvRW z6=zbyukvkdJ_#!-%0Wu4pEF32*J<@IZ)OfKMG2V7f49RYVQa&@+<4!5pGEx(l`N~p z?H8ud=pT)Tmxx?$hlIO0^j*)xFSrQcP!AASICdhM;cU|zDJTshY)ZeqYm7r$gevw^ zmE8r#<+2@#cSB0%eG)bGR%sR(Ew`s~HlkMcXYwI;iBUTc@2L5`#% z#ensiD7d*>?&i(0ztU5z4_bL1B$_4u6qia&0>H_$S44=mFulC&R-AR{HN#Od#bq*W zQ>}_9q=P=XDN?|blUAXR{GS>`3y5sZso-$EnRgT9gdAsfPd0Zxx+&+sa=lN=EZ0?Y zl|n%unO}Oc0v6O5#`n;LiF_&J+3)5HNpItit&(yUnQzFK4zqog{oe{be-twr8{90p zw;9Alnz^i;JbE!ShICA_njk(sI9jXAmaH1H>)@oO#4%4#V5R+otCz3ttq8%~L6X;N zuH#pD3KCB-*8`+Mc6hqpPm_m4n=S`)&Y1eB)=QEiB>bCo&R4?4^p{s9%v4jW&zfczM<`< zauPq}iRoJTS{Kf!X5oDQ3%|n19Y7c$d=vtFRMtH>6SFsOD|)x)zbNOO^y~ACY+aWx zy@RrqE%d2hKBZ)9MrFY5y7TD28_IoKIC8DvUL_v=q$56Y>5XNgdh~_iE|6L({OB^_ zSvPH@RMUhP&{twKAv@_~jrI0|HhurSP~jjhb7-JDr^^l8EDE36;?`xtUA;$K_kw40 zPAOty&7{!8mR7~N;GNJy#gH@b<_Vwf&Wyx8gWuGL?td_OUC0Zhu{&$|zt)rJ_B`r= zWXU}f5_}`R?Y*s!`p%?6Vxqt^b1^ssz}0}ZrfX{yZNWRD>XiTkS8XZq#;~OIj9Pq^ zuJDKX`R}_#f8i0CO)H)1;XKoMR_@$tKYyiW9OmBqD2sugtNLc>Y6*jR` zZ$2k7;c9it~4YJ2u*%m*N^cknpv`4X>Sa;`M`dq#ruj=z_4 zlt6owuC&-JEkNs~lt|IsvDBWz1W=s8!?dLM+8 zz7&e_@xi~?Ol}#2Ei zHAVB7)IcnS+%g`t2*NKLIdz4g26}2wX_s5f_428t_3uH~rorHNP)N3Uo6pD!DCT;k zkWKkz>EM1~KE9_uTKSBb1T51&!e$Kzj+AcbQhGKJ=)gT)Iup^`29keOJ~?XB zzEF7XYUwE^HiR*j-b>7ZIeQluqO5Qz22n-I5A=w}Ptn-m!0-uMSxv*Zj=6^vhzY2$ zzY|L+i&35Fjkd~?)S68*ROvT-&p71b((HaTKLL>Q7o;pB6Sk)>Vg={s^kvNU_NvLw zlS(KI!w6MaN=VK_Y@y<=?kO*z>^QplS+|J60ReC7uG(ydIwF``q=Bat#TfoMmbTtE z{H0-G?&UcnmI3dzt4ehXIp-^Q3(?2~Pv=;8Tykb-p&>ao`KjO}R6@hAB+s36{;Tum z`|wRie*~=`F3M95Ma2aGv){%=MQAq2s@UD(XPS=+qYeq1JMgxFxK%KGr_C2_cteJ< zVg#`h2Ktym{SAxvXc!6})-71_G7eTKG?>j|VCCP!y)V^*%VY*=vK0fGGWzlcoCQsB z5O(`ON2nOT!9)}nYln$^LRrT8kte=UQs;u8-a4erFdrE5_IjfkdQi{tsXj zVJcP}XCkuP)LgP%LnCR2W=F+Hy%eOArixyB#% zetTUUOqlDncoBKL#(EoIfO;ZcaR^JBW>r^Hsp4+!P=h_?Aoo=9#4eCMXF$D!J>hP^oMxJ1fAbj$=cC>-U}|l?p@C(? z|MI5+G);Lf#dTQB0mgbGzRwOB`cHG@ZZs7umI6gZB%-T8mujd z?41-s1|4V|&M!yOJ8*Tuk~sc<-byGJHSK~#J($HZrrrl_agFXl-iRWv6q!n`4KT*M zC3c+$^2&Qtm`V{Ih06NJ{fiAkp52ORiTlB$7U>vR_6e>qy|l=6U;xjZ@x$(m@Gi@I z-SAWL!1tn70;M@AT%NX|*dkwhEmPo|4krv=3un#bk>3PFKr&LSps>YIsS}tGw(dh+ z!d>+xg(@N+2?MC z$jVLhIV5B0g84KK5~Z%Q7gYgnxiX-VU!RTAVhvaEgZIXQab;b)7RF$@-+8W2eI#v7 zR|hOfdmFuGR{H?HEJl?9H0ug>roe;Bc5OeeN_iYcqxbnT$8mb-VVSe{Beg<9`lcd0 zlb_P*#nY{(@QGNtl3tnuh$y1fdYFn5+~6e+)`CfR{5&WgDu}xb`6% zt=d8ViqU{)(61{1Vt6m~Fb1CX7#OFQAm)wNNX%_wL+>YaQ!z$ok1;7M%L^X!8lM_p zh5NN=kFE@|J{5fyVm?jkk&D*c$~|0WIMgB5Bes|GYPox6h{H+x6GG1+vY9y*uwwp-7z{AD+^&rrHQZA}g>*|mhfb(MfE zoM1n!)QqrCFvz!&$O~i?u`wFgfl$8*G*Sj9oRV7Z?EI|q6$=z)YO7Lgy`~#@wKzi? zpW7)Rhd6WBLr$bGCBWW)?LpG;158RE`RQ;? zCtxPV4(79erFcrK@M1z%(?PdjoD|Sq2YN?@Xif8ZnKb(eSh%6rQHZy`qPH8P-xiB zpyyQqtL>bon+LuIWE*2v&o5W~*Xz#W|Bb>Eyzz}?j8JKLNK$KeOa$L3YC$_Da3cbq zSwCJC3PCR|Ma`hqU8L7@jw<2PB3^g>Lv-fwpokn^m@X&IE$-v&s7Cho;gUS?aWgvv zf_CSwpSVSw0x!+XcE7X~hcq`s1>4RXM3UYOgp>$-Y80KKBfBsAHJGNZ1;{I3V^Mk{ zbEsA%MW;R&z_p>*`5G)G$%JxBSuJ^{X{u<~|1mc5V2=Hw%jVJ-u@Slr2(}QNZOrch zJGfYxqflnQEa^uZ=U}Eg3a3Slbxri^$MDNi2E?$ z#zfs-hE@8L!K<`rl+UUS=LH%HYQLr*?Oazw7?y#k(&G7$_QG?y+Xe_400V)EsC0eh)83&EZh5sai5KoYl>aXeLh=^aJX z1-Zi%cU5A`o)M-1r8ZIMIbDw*agW%*W&!=Jg(x!|M(P(?J&BwSgqMDBMzJs^N1xCBM%oPt+@Wx;5CQ3LXtK1 zW0S1{{%jD);daFVlbtgEZHfT1C)h)t;DPUDzJ^O%gB-J&SE~@CkSY7xS}T05{kg{S zvvS(?h{zVGG>>&eMeA`IRnL=IuQ?78sZtjHlBwYXdQfZA$hvlI9u?~Umtx_z=^n)G z2o3--yrw=7B>f-?Y%WYruJ+At4y|e)7VsLQL?t2Q*2D+SFGJ{jpt+&|v;t;|Cec_1 z5y{dMwL?Ve%6DB&{gHqVG~M+!6! z#ZGuu?-SEJexwN4ejVSxck5*_!po-@pAVF{>M2S2hoFL@n;Y(>xOTv&RZvOhvSXYztf3UBWQDDZ)@JX5^ov1Cd_kAN=z;ZNVtC3 z84V_`d=Ry!MIrzTO21GQpJ$vd1%MeGe$?m4=bBV`ZRP#YMaS}MqT2a-=`0!Blt@h8 z!-(DM)VS`cOd>~*3dyLD-wtNjFXcMdTKTGmUN!#x%iZdc@C(HkI*#~ob8pWt#Y7fQ zjSdHoG`(8ebwAK@vd&-pD`d>b65^6WBqA7svF)SguTyeL-u4H)LQxJE^Vet82|$O%)Cr=d9zt(SJeNk*uC{(eo}XsSIG(Y{7hHau`L71k zE;AF}Obh779HjKjHnWJ!YEYgs#z@^h*WMWJnAW&{iuCz*m2&3pQb@b0C?oXcIw5@%De6x%Dh?U&eaPY*@O#9a^JUatHEV0dGgp2JsFsa?W$5&$}(Mgq?x% z4#t<-MvB3MK3R#IRKefh$l>)*rdm+|4rx3pNk*tQX#fFhRF4gxp#(S0av}`$D^*(O z6mz?*uAnMzjD5EwAJj0ooj)SY0LeF<(EQwUu6(bwx`xFo-V?!;J)+ZRlZ&qJGJGvb zPWP+k`VWAsohD~Ji=X7Wg|^M9g%p=iO#!e^zT$r)VQnjKzL|}ub}9X{-k{j?8B&LW zs%HAPy+Y#HwqU-Y-qWJsn-?WiS1~o+B&0YSJ!iW9Yo(~(^apFU&Fv&yoZW@t1klN) z&f~7}hO@ErCuHMZ*DO_Ub@h3#=)Qa;uw^3E?->ZWCz_X$Jgf6{PpWAM1=}h`|5(1|pkyG@iCM-kbiYir*>1zNdNa_%dCyT|<6^eBdqv2u=|P zHc-*AO;&a&QyBVyT@xqRG9zT4O1o(i!+IR?UCsWMyXir@V}V)7dV%JUXePUdZ=4KJ zIfZ;%Kij2pLo~VUT!y*ZpI^BwW;pgnx;pV(-JGe*R{Il!U~HG5?Xw__Q^{Ww+4v_v zAuYOH!xF_R@Tcoty-3$cG#f;{WvZ) zuj#7cJhpeGUS)|BH#%x_7tzZtZANVJQ_-aMrt#8uI1o9YZA=#-&ff3RAEn6a#kK9- zvnbv;o@+4&P+t4so54r3ft7xmQNS{}Sp|J^xO-^E#o&@?&D?<69yslnh=0Gf@PX&H z?#QtZu3Zx++&`?mxFHj>_gw-@knG8=iU+5+V)Sc9l7L# zt=N6-RKPDA-~Z32sP7jFRXcGiA+eWYGzBYSZR@=9$GpDSdBQWuvs|7oxb<%nJ0Q~E zJ5a1U$gAm!Fi9y;KjFSBx*6#1ow{f`IxwqP3xs)?Zk59uCq)M-JK#RtQ-cG4;4a~v zZywAyvB4;gUmP@nqYn@-SA#ars>a9lxSS69h@=ect$3L0Y#*z6YRmJwOShn($rKa4W! zZ&}PfVQ1q6@r2{3Leafz3pFHYm4KU7Os<1P=cJ~4d^P^P>W_>2Qk4V$=-ZaNgmvTf zf%4LtqTy^z&XGXC$_fxhy7mQFFw;~43(kE3xn??HSD(gwr)sc9t39@GN!nS%!SFKn zBTb?25aWih0Z45ZhISm9${hgw!?c1@#nKrZnL*dwK%vzW9&Nef7={e$*%sG;1A?l6?-u%G7|6K4Nyns&A?JppJNsIVKijA_m;PFkP4 z8YN&cJ}!!)z79&pIPV%S%YJ?Sv?L+PBWjKFQ&V7RHsJQWK3;(iD5xekB_8bk%<7fo zlomR?VAuNu7Ev9*#cENIo(>^c^ZvJRRNTh?Es57d~Fr2wO$k29!oIL-e+6{WfUwZad>A;I}%PiMju;2o`$gvW&A^>q$G0QKKz4!XCl zY*ci2xpkE@)gH&9gwqkqIX%>GZ+gnPxLFwTG$3TA&%;(kkNN>f=w?wpaEJWczofYF z#RJI;sJxhb2`~1TutIVDW9NGgv#s{3%DkpRaCCiN9Xau)=ZMiO0{1#7;$S>4cPo>_Y=i)b4o^&F(@$>y$XxOX5@7Hgg?U{4FF+CDJHR>6c`Fg+4;vQ6WS zvSYcclhkd4;BQ{^;qMy*_AlDP?Bz`Igg<|SmhZ&7ENa%E^MWCQyp(SeeoB>fBA4hn zcTIh+$*vr#&f2PWot&^2cI7uLItma(m6P!+m&N#o%2Utj7!bxMztit}9w|DBd3j=@ z^z%LIml*9pZpnOV1shU6GcdJ+(9P=v8^GC4!3H5EHg94w;4>kg@UqIpAF?kh7}Q{$ z(D_@0dVRhMNfFQK;ETgHu%9!jg4EKWv|m4CTGZCbb?YbF5pn9UW}UNsQMO)!2U0Ka z%nX$~{|>b);Jx2io4+It-`yHOYYnJ4wLMml`S*x$XN0@t5cN_p{8!2mxNn8RYqgYE zJ!>IJA=yGybH!;W(J9ZYJOnP}!{S;QG$w?#LQ=eOo_qku7Jh2Cb51nMJMM-!g?? z5E2rZbm=mevBLG}Ku|5^IY8!n8i#|~G8<2M-r_=XI#j4JP8 zyJoL5+H5x?;2EYjz?LBg!cxZ1atxr!uT7RJf-pLu*|_^tLZ!%ZIg4*)ICD&OGJ@ff zn)j*IqSzXPxND`Ud^?myu&CwiSeq{AuUcY!ggCi zwdzQ~lf!pBLvqaWUn&3ARyWF)U(oV7>j|I9DrbUy^Ik;2R_8S1_Mrfteii>=gDd&G zGxR0ZkxZL(m5}<(@wPK>La<|K*MjgJ zNNkd>?f83fsU7k+fJkk4*&WPE?%LmMTH>9t z!o*4neJ@{P%nH~o^=edSYCfUhO_UZjpW0>U(S_T7F*0t&(@g>R^?C*b|GKPWu&#DB zNCK`sH)5c$RU;zlwAqdzb@{U*pbKO?2x=4GUT1yIZRc&smMPA$oJ@(!PId6v1>}D= z7=bKaYI_)U<3k~sx1hljVERtbge$!V!CS1vxXxL5wn!1Vb(G$!peco43&Kf0x^2%A zdiC=tUWPuJ2^twzU$ve^#AWmY2Ke~HvhR3F_TQWxSf5;D@5>5>%c8^+LM=Vj=r3mY zPXDi*4_U2zon15YRXN{R`!15$Ej5CLlv}s4iMePWVXfvRq?6$69E051PtFbf&Ijhr z+wa2-d%mHoesBx_my-LxDY9;cO}GIm@zptl^QC34q$_ur?Mq3R@#9p7$8(&n?({n~ zq-wPyk30ga^r9af?M!&r{Fu)WP+rFXTqH7^4eF{4KdCo079TIUn=J9=4d$ouQ@w-zk6 zx}AF&3=rj#9mxBjHpKNmVUjvd$4BY%ZYExW?A{4~&MYwZ&#jlcIcH^Q&!Fm6{NEMD zbcZrNMuCP|ZrQNkmJleWIUZaW`cNbMMq=maDzsmTgV4B?CHQxnSW>StCh{&$>wYD) zNmF=i&7C8JnH@IIM+bo_1~AN_KMVvtzQAD1+S|BVzFF&yp->NF9X-$c#fir*=}v z7*>i?DxP^E3P0sn)lTZyIE<1RCdSs`>?N!~G~w999Tl@cc*+m>n`q;?(#=d*Oi)@z zGhcx+s7U`IjOz%>Gc6W72}iv2fHp zY;t~f#iHAIP8dedDaNj+FsJXb|3CwsdT~;TG zh;Y|-Ru~x&6gt4R5jySM99G6wzosi~C9kfvJ*t?L5;*Qq2U(#12c(mv;| zG-KNp1P9jz@RG1iAc5?7CN4d1VU>mf)9S5aom zsigatoFobEMDcXf>vL&4;Rlkn{S$^(3enOToo@XpU&sLKD zLvI%Ty+1}E`3Lg{uNkh~7U4Iab6OS&8}`9}E>e^s&XSla{g1pcfF2XK|Jh;*9EAwt+MV|_}naI-A0 zbKW#%hh4!qj7D?=)rx`Uut@Wh!NCJEYTK?t%+4R#D1^8HLff8LjY5)B+4OMJJ;UL6 z(uqk}fDOjq@RsQAtxm7>OLO@^5b}WSyte+EV`5N{7iEQIjiLVM|MB(iaY^6(|M2&^ zT63GF+s|*g%EP+)F1O3lO3hG#tGRN!N@??&emznv#Y2gSih#hiT)Adx&82|?l@&8j zEmTw#GD|Z#sHlhtFd0rJiV*{rS)pX%nWi>>mtV|kGCC!UnRYoJZ%OQ@}M4sO?6MBSkDGa&4J zd?j%vP-}$#MZlr~oV`ggm6T}iEs@4~S~d%=WHRz%S5iQI`#^ZlkW_33rCM#lRmd5~ zex|)PMf!U76Ha-QoJs{pC2Pk8zPtXCCWMx*l8*gX0 z>gP1~0sEPr*6eeHYY!`1)duwZBF*4FL>ANqJdse}2n$bWxu3 z!;(Qr9ccnZJ};oT?XM1~kB9D3L_5S{I z6LZ;S-ZB;vwytb1whDHHyC$9gGc8XQb%u&Ifw4jz+o8Wvnis`u1aqda?53n=z_r%p z9>q^2mob1OvMAyB#I7 zj4iFM?gp_1h`2())r?D^-tHG*`>1EfGm6 zh|KXNS8g{^NtS*fRwDw)!{V5m)bL z-wog-2h|6VeM(o@Pm}VCvO*ZdsZzzw%)}A|fb0ypvTgO}I<0@cMSUnLvihW+&vBE_ zOl+ujORA80cdnO}CYI=;eri!xdPXp`{^&7-{Z@I6|AwJ-ieTS#%*ge8003D9hHH8p zFLGGSv!5Yv1uVZ+)N?vCpoF?`ei|6K!0vTdAV>Ic593&5$4bUn1=mP#-TL=X((G`#+{u^IXFQC$?w+;V{Y8T6n( zgv?#DzMbqw)BY{U$^*Z&`J4leG-OyFb~pa~ifqWAT~(jIVLqFg;^emSDVV>~qg+mV zL4~uv(>`Ze4#38=D*z!Xz7!um`)+T~Us1*=QRscWGG#ba6y0Ntqa>XDL2da%F`|o&RLYR1f;P?=z8G)_&RFK<$g?Z zVAO7RZduMQS!pisUJ@WbC<8q+@;dygSno;{Ysj1TX|YH>=&C-cUr}=5#TuolMv_E1 z2XUw{tH0E+K@r`+(T4Usg9^PQ|Hv(51R=q%C?=ljAJg6^(*-JUrmcUn7PkaE(ZpWC zcIypA)php8+yVdbnJtl=kjZC;<36G4)BpMJo?@MZ1J5re4z%nUE*mRqL7qMsJ_A^n zw=5uoHvf*yX|>i7|4P+O>0!?V!MpJb)L!G&&ch9SW=K@piVuFlvS@8X$t*3s=ES?V z4o4iu#CHu&U4tPH`(Hd5L=`ObafJYo}AAXMXVW@Y|qa%LAoLz|hcKe+s_!Tix?2(otBAq>*Vrj5D-S z_mOVaYpvO4L0-~ZF7W^*VtG3tJfx+XHqHsK%6DbXsq+`E`QGDmpXImT;vbmda?84= zp5UhZ!lWw&+HbbM&Od<&r~YNE*v>gu*@L$bfX)ccjPryYr#z|&;0*GHKE(GD+` z;{5O5g)@rT2DGWFWlb9379NuEde!m+9)M_-9yS1+*h*=wlgwual?^>p_*VPE(lghN z;M&^5P47k72+nWG;Jl8{lVhaQMG%S4-I=J(DALl>(ATyHVK6BpnlzS9k(l1|0IQv) z-`j4y?Y-K3eQYklPaJA(whnpjF6>_uVQ*JHOa=ec6p;U)7eA*y_a@AFYpG5H9e&K-i*u&hzskS& zHtE|VIENa}@jpm(dk|~c`fvDuzeQAe?vHAldI%}JZ-h_&f8Xi<_4e8i&E1`}vgp!r z@_<_(_niD*cLV3&%l{~P@x4IU4~AWuvK!!=|37cw{BTh@W5IONa!mrxBhm@AQgLWE z>KNWy0exXuD~Hvx;z5| z2`S>|Ak+`WqEpw3t%*s+q^-WD4;7<#2c|mYQPX4vti=*}3LIjkrU{E`w)L=~X@|-) z<*6GL&C{9S2CN_f3?8y}3vCA3jGQ6q;OK2ZbDK{)au{dhk5wV5Z^9?T?*dDjG(Z{3lV!n;+%Z3$}WUI>u6N1KU)BjP^*($L7@$_=}1kTU(%t7h!+gCmPW{~fv(5w@VG5&xAT>*GVEjvTUSSiP%C{g68 zx~-^AJ1Lgfgq9%ny{~KQpU?<+U$)H!jVueX_^~*j~oJ8l* z2Su+$qGSDIa~w6$9*ALXh9;v<;G?SZjqS`3%Z#u{VQh!xDau#4l;wzh_E*85jdNek z72Hi-tOvVtI_Hm6?=t*aoH>^HI4>3R0@U&YA`|Kx^r^IQrfyRrbhcJ$fLJlcr1;MW zdavt_0Kr_M>9X&9QVG{E=8-oBWfS_Z)qi+eGR8Jq^K)i$V@mZ-USnAkFw|a}ORg!x zZuCDn8$$7&bnxXR`3wxVLNNXBecat)4E zsdFDDjYgJJTXhFrjkusDwcsI*(<2YObS#XUPGxUt!C+GxFy|HggiC~^LQ%X3DrQZ#}*-43I)gxibQ-3$4pAx#sW3(uLk@W;T& zY0c|<+vPrKx8%JDZ_dAe`Q_=>w7LPV^??d^6c=(AQ|U)#gcHKb_1ot_n<){ch3UQJ z`HGEsPfSQ%g!4Q^AMFG!WIrV7IAM*dPP2-RBmj}DmDk|VF(0x3(tD9%Xq;QzJY)PQ z8yj;hN|3;LrQe#dFdON3Blii;GnR& z*^L_N(Qkrbp4lwjOzHeYp?uSzqCd8^ssiF-O=&P1A*V-2CIG+S{17m3;151Rh%bb+ zP2e9i(CpkpG~A2u81$}1rAi7`18SHQQ+RFn=C;Rq#y*C6s>PgNjjp{O4^W;j5jjNN zZ8tt1tG}0=jxrP%bLKCMb=Ufh_ut`P%&oFz@&#KoWkZFfw~!AoXMAt&j-$zZ{1$lU zN*&HR7#PKyGvI<0{ru$~g!v9Jvm;@9wh+d@c(r9S$|Iu-5o#z1Cj900Poiz=t2aXa zz!V+osZW=sW?CosQ+%$9zQ0t^QzKc9J7O6?7mMiThHRLq+)irI_HM*@QwcxfdLS7@ z0|oyCiq3C~CZH+Z=Gi|T+KV2R%|iR}5RB$KX6xo0DjChMC*HYU!E~%p>9Tp%mE!T% z;;Jdn39qY3v+qv4xiL90Qxj^;>V0K-p=vT}Zh|Q!9vF$bTWk8=#$DjgO=?tF ztmJ%V?E&(lVN6)Zqso2ah^A1lhWllB!5zr##9ACUO_4(ePWl(#?l@PG*dOxx>VCPS zdFiO`$EGBJNXj-1)rltBX;p&@`G(}2 z=_S`!(AI6FSMH~W*`FeE#G!}u=XWQ6L!-KWH$m|m`}0&}opeseb7(&?Bx_9E6o0<- zKFE=N2F|TWt_g{JV0>~PV=d8yi&CzMkd{Q$3})!U8;Y%jyVq}CzQ@i~TQ(UZ`cth+ zCYG963i!B8=<`riS+;*G`t5^;?K6j!a~9R+Sn(x4wmNIS2%kH7yh!&;8!!_h>j(wX zwo+q&yz)*^2wi#X7-AztPM*_@3b=~N{@zL{eCHyp&FIVqrV2F=D4q#a*`Fw6`Bm z%Cuvh1`x>vQqZWTP*;YGW7_QrZ|GKr)56#e-a4Z7DD9gNi`yMZ*pP__36gZ{ziBIpy1uI9dW+I z_MGY2zmLq`@FcTP=IHXaqLj_Qm+wq3CK{+PdAGV7-4U3In-JC!9#K+lzhxk9by}vH zFbU`nnez)v@xm~q+iFq{l?yBopNxOpd1m|h1YtO0XDbWByX>E$G#!Iw|sSRYg4d_76-R~>Jf~?f006$?SLOBQp?P{qj%1!9Ntz+8yTZ0*{ zQR?DF4UmkSyfl|ElTV+8k|C6gYt_46Ucw|s3BDxEwA53ZBE8X1A2}-Y??lW21n#g7K;O#OpTgt#eHN+3>~@1GH-pEv;39s%MY^6o3}jt783O`xn8m4 zmi=~OPEw71R8jm&Up!Tlx6#|rr$vZUqBy-%uW-N8!l#{vBMGI7lp)zcsGPf9aLVhV zG$(Ek1r%C`*8?@&_sI3ap^Uu4UOS{$qyP?QM`5pp=Vf` zQ-J^%6~_{+hOzbcCIS07uAh7wc(LzMT`#4Y=BUZj#`<5fM;#CD&J2WBUJIWlynBS~ zBt88otS>@gKTeo^Qy#fUPr47bkNu%hO6Hk`l+Vv!wQppvQKGkGZypaLq70_Q=f8Y< z?_Smm9>iXQJXHnP{I5n;+JSrhYjDi|ZGe+QhGTtZ@>7`#Y?OFy_Xpo}{)jv-k26JIvT*n zpBL2iJ7P|uStb*|6^ATQ{_ZH45fQ}C&YCAMV{b2L-4O6hn4pOk!BR*w1@gOOXi;S; z0rd^T8!%;)HG{$y%Xm%aLUUfb)g=x@E}1Sy$ngpw_nhKkQKbm$OMOz;%ez3r@coe{ zeqaCtW5ti1t}OSTXO5B-i76o$p^6&hVV!?Gjxjaf`CuWxP9DGD0XU-29%g!E4-J?OOos_!^V zy7!VU&KH>uEY+sFU@9D1gbyZQ~ULv9Ne@Z~dgK+y$CLJ++`77qlI zA}5QvmYx1-x_f0{^=XuIIsbT(Ein}#yGZdhO8$c5*yBOEo5=au+kS2^X~#GZuUax5 zko9t}tngoTgm3;cS`xyJ2U+j+UJq^Ja`y`aFfH^LzJ zOprF>GBnEe6`l4PeS=;`Va@**Q;p$ZkD6p_?!SD1J^q#cBSinUZ=s0NYI;X>LSf(e zrIc*Tkvh`cV)L_*kgDRMEc0H?8=YV8JFvlN#v(*VU{W2SH1K!1X>eWQ%^@|5g|9u8>@W&gEz6h!&0(3l;0?+Amy=QTkwWCH-x_$ERV)dM9(k#y{F>pIg)S zZB}* zyyiCB)}POQdYMBy>OX#ACoR!7VGljd&KgJh&NNncZ*1OxPUDjIH7I#wMjtn&qte1d zP6+_<9fxb9kyMFJf}c$AGHY@8`8uqAmtD4xXl19ABvHnk3d?0-U>kaL-avogtDOL4 zQg&26R@z&nPf`N6h{Tx`RF>mYTwt{#GF%WvGvX2ip8f`Clc?W>Mzq5oIT zJBra8>@W-z50GUZXq#!FOx@*kE2;}AIGj-Slr>^8A2=1%UK(1%Pint(K|ES z7464&`y8xCQ7wKAwSqlSi|=5H*SetIS}EYgUTznki^Fkg7@Bb*@9WCp#Py$3Q~Dmt>Hmh9oD0?(hKOEu(1??fR$@38=|vx=6Ag%m|KMRd0}rj&cmTwgd<)i8X(?H!k|XX(y4Dx_$1TV}Li zt>0*h%_5(=S+3FbbTlT~*84Gw$aO^@1N~C8o1s8-9w@c*9+`eSpXaUHIX-;3%|JO( z>r;v#*R8wpKI6TqT7Us2e4j^q4n)qa9xfl7z2s!4XqC=>LO1TICy}FUe{Bg>Un z=rnLJO%}%<%;IV0a;1x{wyp1-gMV@EN*z4=swv~e=2i24qX)s~BJ5Zcel#!{1&Qbb zQNz0;3NtlBTP3hEou3GsdWgOm&TvI7+0}bF;4!Z4^)ZEVsa8zbtXE|`9NL<(fjnnx zP}SclXCqQrFId&9pQYDOjhR{<1W)V+OJ@ApCcRIz?k8=gi5Xl};A{G)yw~z;-3`k3$--cn@FbAg-|J-qjFP>B^K#4|VCa2ay!md1C7>c(e&z(+ceIEkcKM*x`BQ--vl*Wb!%YkO-*KT;sJ|zg~)UQ0DKPzP*(W#mj>++FhxWEhL+!U%i(- zM%mz;C)$`q7S1M-6Qq>cV85B95y=eWRR9bUoBwK#(Ui+doFYm&j&F$W<+Yp z$D^Cw8mj-3Ym`fdnu&|4f_QNCY@eb!P-wA8X_ z=IgZSK1EoY7`(7cFpYofM)*wSXPGldKyuE}mQq-$=)amY}-$a=4u zve9!C$?3+OYK>5aP~d2*|lWcQn~XvJKSP6FR??(j(l6 zs^CK1~vEpp*HfQpGa4om|*v%{36VYDo=VD`x{`CT+dd5P&2mLEs3)H6m%|J6z`h%=9u3Kh|d<% zbXbuVB*5^s;Y59Surpe1>yz6w?4=ZThH0<=#@^er`M}YpkE7o8d<#wnDawDeJ!ao| zMNyKd9-2urGQJo_1v31jI-BP5>h}1!r%OF`qy8WNMIslB-cmkpu6OL4* zoX$mH^{v%EAyX{_pmq(7As$*NyjiD1507uCHeN7QK83&E*j^2$%=@55#Wch zE)mfoV!>@4a0yvraqFOY8K7MjSv^SV`*!X`@u=ZI5v`=)am*BuzDI{Ls3VVo@IQd*5UzGH zpf-jh*w4@MIz}tA@X?vPS9*16DJo6AN`Pd+p6USt3(vpsh~TFdRnO*J#y@ZuV$*{W zMzb*>KJR>}JHzhc6%wdqOWbZ4(*(!?V^>E-!>y2 zQ-k_;^GorHl=A)49bDrR)o@?sBiBJ@2j`ll?b8{kYihb1R%g# z-euN$I^TL0!wo0k?Y4J6OKohOO`C3fmxb(d$f38x>s3|HWo(qlx8Ml}{A~Yt2@;!? zXgeYlw%pqP2Q|VnmsoVHH(B6ynhnqoKiG0@=O=Bq2w?*!s)fkY5NK>6Z~Z;Y0}>rf z>JVJCGK)8&0U+j?WobTpcYI`_ooF8-_Tna+(mQOGI&&qU^ceF8Y)4lS@ZLmmZ@-G( zFMN)1t$~dCer{$aHdO(#=MZfB<6Mx%nOag<2u9VDh)x$p@rFG&+_D2|{lZZZP3j9g zE(zx4uelFysooL9`Q|n8bG|EIxQPCI13E+82p~SAjf8s|orYRaA3W}3d1?WU0}e%L zcdfm5x&27qPH70i^uc^UPX%64a<{wZa1?B%-Y~W}T71*}39qc&UKmQZ>K7CNz+0hd z4l%87#MNaB9;RD0S8JtEd>}Ctl-a0saIc`Kgbl#2-FAx+94|^@C_r16-q*h&)khAW zf_4apfQHA~82TXY0eEh5y|K*l=Wo#DjnCm5pR-`*pJCGih&}|0a#f7kJ#aKx_YWgl zN}9k?Fds4?zH}6$=hL@8QJL32hB04cp&tC8FV|Ur?^Vr}gNJ}NK>Mh#<%P1HuGXt8 ziPU>D^#M@jx}IiF{en}iU3(91c;Kx8teJ>J(Vb%rgh~m8_#b`!VNyMW!_3 z{;^}=!dr3BO>B=L=bJ4|Gl)K-g_P72PL)^j17(VgF#ase@;gk7S2GD}Z8X#nol?9` zXxk?stC+Y~LuyEfo4`%gX&pwY2AB!kJuM39g{xKY!PWI=iQfn#GautM$}(_W5dxv0 zo*7(4@Sh;tEDjyfRRNEB>$m<(%<71}6B$>ElCNgyw(o&=hVlZ!-4HM!AtEfcuthpP zm%Z6vuMm*-aPqP{&HvM^g~}J+$m@6aBw>SnE?&wpJx9w^?N)vc-mFytD-99vLT9n< zBb@r8DiV`0;vZYti$}FUU%~uWZ^)owq>d>c1QNl=jh{TfpZ;eNj}8fKpa3{|O2!GH zu0bC+{B%pJtbw$8l5uYapKn?CACU9Y471%uOYEKB;SC+jMJnk>>q-TyABCG>^1mYs zq{vC51QE_Yyt(q)$Jg&)d3^iJFMi4V+d*k+rRT=Rdp3_oOGt`py|hJepxkqu z3e%Cwkwj7Z@zO&GMRA+V9Z=eE0M!u_@!)>Q$USxL6nMHK7dT)|w)aN^rhL1eGQKab z{qQnns7i>I)etv&y)ptA>h67P{zK0_V2r!*TR_Pt+tJ=A^;#gU4NQzJ#dIs=}=t$}UxE|Jtg}9d5+!|@lFOqRG3%$|s z2*}o0Dl_yCKcaPq9&&A(l|-k~+%e6KEhmYAApXsJu? zxwk-2X%0v3=<~E)m=iXol)2B`RNtVk!zyJ#dP7jwKO_I^zxI`W`}OgE;S+i(&l(cz zj+cgZ{m0i1tojx<;{aCf`5?|+b34q|?(m1d@h?kpTnUvy3L;W2y}!P`@O`-;FrXLv z&KojOD*kghWB6*twEM4#M_V-2CR&G2qJQd60uJx6ZzbAXmE zO)5miosUuX)PNjTqmaM4))M(SeCnTd&D{g|EA-VjE)-s-r!rmcPyJ1o{h4rKBunNg zV84wM`?uf<54yiO&3SV9P)|!LnHvvF6VM-<013zmH0eAdDorvr`(&_x(O~6)+EmT|94fou@ zStg3yLSYR-jDPNu=jWN62l9v5i6yMJi{nEh{x64*xK~Lk25V(wGe!p2hdey;bAd`o zAJkRk!ZT|~2lVF1wxX3mpi`H$RCKLJ_gOf@EH@8^Yl-W(pe=K~3%VpQ!q3n8b-!DR~GTncSB6x>%)S&Zy4KGLr}OJI~ zr>+Gkz_W)#*5QZ?Yp<1JG62Oka7ZR398p9$-F;I#gO!ZBrkXYkCS0nR?*szzEve(;6mjcnjnBh}KSCQEZ zy6$Bn)~jNOh8Pp^K;`?b#yasHpkT)z^*>1U9PgjHIue+go^$s`YewF=b`X$)Dkm?m zHQMSQsBKL24>%g>^T;-pj*>Qn;v)UHNfuLCE*wkkaahut?X3xfpR9Pm2-BA6%xD9s z&~VEgxHw?#;djxq=_Q&9+*}qeIB$g3z3U)n_Fz;yap}*I8^nsLRs=&)EBds>biBL) zXh*zv?){q3fPWwUMSaGPF{Qsd7Uwc_!E6}rg-&!q6i?5DNqZ*Rd2Q`LDt6lc{k1aS zk4ipnKKr8YL8$-3tCVQ3N;oAcz0_-|xx~lyHHwv4LjXEEo&ivxVfUXX-+mx@F+pLy z(}{1?z$jiY=l>x%d$XJ$ir%?rLi|&0J#X5P^Aj^W(zj;^B(vu)k3Fmj17jl~pVE@! zJ|1$bV|s;;<@Ii}zntQU4V|62korhJJu@!{p{PeB62?dzGnrGdsR-v_T<)WG%O-cc ziTNzAh?80(HVK$t0YsX1)sZ#{koHewoEqvx?>BoANH35vU^^rn1-2ja#($I*}dlboE4kt8_b^Hmz@E z$ZZ{HStDac(mNCNhjvz>Odb4)K1H!PwLB;|kXnB97>IpV(XOH!|C?CKe?t?`dTIaS zS@K+axC+B{!gt*WZnJ;Voo}h~2N~P!3fx>raFe4t*TJPE+mx+qaiVqQD^ZB`hXhL5xR35iY7c~GlkRP3Rr)~BGXZg?eyDuKXPc@$|JU=wdV)YrKqC4w#dL# z{f0R^fF|=bU$%a>o#QuORw*_h@=oF+NAZbCVuVLsg&XlT4f@er9k+BKymVmuc)o@1 z#aSI;ul6MAp3%F1Sr_eU3yWus`)srs0)ue%t(Ql)U6arF6HcLjQRb#AA%Aw;Xz$o9&P2XJK#7ly` z(oOCk{Lhy9a}1@s@#;hiJ+>1B7wi|!a6K$bp`CDF?(Gvg{J>gNn*2*wZ;Wo$TD)rV z%Mkm9swoN*5Aoy9(4me_WwF7j$gGU|rU{^r2rlln&+helGs@(2^K(5RsRo9p?sZ&n zWRX56i%Oi=4iQ#|ih;_P#6<`8XKCG(d;C2MbV~;@}uMV5aUw>Fzri5$4E1aW*q%qL3u2+)1{wI+& zJs3uQ8k*xjQL74H@$I^Yiw?mz?Ba6o*MX^1j($K@mMfkL!3NSJ)jIe z!&$o;JWJftT`Dinw_G-^ByzB(-|10H5y)bImQg&;lbX-Q0ZsofV|`F+sJ^|eBaK;= zUdbM7{!=;3|1cbOESYhqRTtx(>ip}K=ktb|&=~LbfAc@@G^~$4iib?6X!IkV`+gdYXw?n48dFWW7I4vg9W~A5rR0RRH3C3qZM2}E; zXppNgmwkIyPjlJblbol{62n(2`X6+O9oOeIrs<+k29l547GAZwCet6z%dup3ru(JI zi)1Z`Kxp2I;a1ReB=?+J`!9-rxPCEsKU-VWCc$%uO1VDLP8qJ3Yg4LD`E1m!hh^4m z;dMT%qjlU^D|iu0C=p95AeX$wMZlIlZA;x_6u!?E=!`(mG9ctrOpnTv(zFqV0ex*jml z!8XSYFDgmLbmOG>y7csYo~aha4QMjdbsa$NFg%%73AOlnTIB8A4SaU`+7<6rg1MYn zVtNI)zp}}TN~Rdpj-g=lmEbPzE>@WdodwlQye-SrO0%YVfbnH3m^s$fGk{UK&%>+s zoAH6cemz;=*R4M%uFonVXqO9&cU#={-p64G#g8rD376>{ORFVex`&_3#`D^8l9NIm zZ}USPA2aP50AUFUmTzv_I4)ZX;X7s3{l%*7J{t(dMXB7USk_`iJYGGyJL|FM{M)H! z%cKpgP_AQ662+)DmHdY^*6gE>CbImZ(b90rQt@k3HRoen8Pr^6b6LYpz*?85t;0th zn+A1VFKwqoJgqxs#u&pqNU~4VBn*0a37-7|By+vwdR`WCRxG?L)pZU}`*Y9L4M6|| zKsURN<9_Andy0Xn6<}SiMiTgCr{4pYuY zGIRAiun9NETI_*&cf_gk5CR>OszmpZ7iLrZFSeRq|E&2|S}H0GMuAaBJ z5mW1Dd|Iw?{q-Ww8ZQR(tu3V7;CSSvFTUHz240LRw^R$eibcxLcVX;eJaw%l3i zyZ%RtM2CFEL_B)c@>Ku59Oy;c{ky|MURl}Znw`35fmQBrF3oN%q_Cc-6a;$>qA0-U z^tD-jzyHi9V4$PNa%i{P_-U*}oe7f2F!GZJH5+IGq6tT6Hl{yFnG6x4%7!+U-tTi6 zxfC!ft;2NIm7~m%2{LqHCMHpqA-f$;JdTXT7Qj!wnCy^cL-^|?^KAW z$MQa|p)?=Kc6W4(R`})?8$!!PsCJRua54JKK$sfqMv!eMexSLnOkaOpN9HbbUx1zg zEm=IU1wuhKoHF5H`1Ef3-y?e0rdsFuTdJfyu;Ifb_1PmQ!RKB-^|f9xVO9M5JpSdoUQMV^=D?@Ii2)L#@c(6 z>xC35q~kn?a5W?*80L=@g;e-KarT!jTvb)&(U78ziIVg_%%S=+nBe?1HQ@*JNdY3g zuw6T8o#5&97h3bAzbd8UuNV;qArg6r(Sl-Hr6SvCw0+9aP?5Rvm=?6Wt2NZvPW6Zb z?8YSRMBJ^+sWb+;19Nv^Mb$%e}@f>DpBT*=?T~`Dv^J zKz_FNjZJ`|i^}`AtJB}-at|v#+kmzi=LR$#lma11uuN{`hJwsl34Gc zWI(sKGcPacy0Mwppi38+EMA0jv8Sw8yig`Pg&^s-@+2X-0KYr}L}{!HDzfGGM+1Q_ z-^xLXS}CWe>HzBsa12_J$=SmQh4VOyx?^ad7J@JWW|oPjYv7bgd==MnFHJQWFA8Cw z)vvF)sV>Zwa4`u4vawJ~ZN-W3-B1?F%5Ulk!wrOaLi}QUe?K7-6s#bE-Q>qm$xYYB zI@4*O*h5iyQ{q^m>G{Z5cpTo!hUIFIz&M&3SgTI6)n`MptIwKX(o6PSe0N>7byUS& z{_Rff?#{SA?=;($g|KRbA0*7zZzJqv9q3)dPEt1$6qT0$gYeSa8)4G=BY8yaray~= zFHoiv&=1^?hT~NLO))2tG-M~CE7|yhfgl}N&XoOlsH%%Ps7<~KH6Ej>hTZK!i* z`6NkUoFwJj$DOt;EYZfQ>A2edY!-CP^mzkFwq|dnG-@DZBR6?4_+?R!e%G9rbwJ0hH^FX=N9>s1zq0?pkZb3>j!;HHjg`# z=y26QWQt7DaL4OkdrGRzzuIjPS)s$QJFgW<0Z#KOCx zV@@7QCxk(EwWnZuNo_0qH}*3!LyW|^`nDW}wAS+@5PVLjCkODMS=Q|&%Fh=irm6u) z4XnrdP+jc-9{>bxDMt|tc_(5dMTK|(I1PO_WPz=L&+2Ehh|zOb=JGk^tR=A{X<;{#smU7sf#CQ z=O35PWkh3lf67)H@Yoo{s|MJ?Ex~1Q|A%41fwgDZ^8-bL%|fJaQeQwa7+zSse&O0F zDK&{1FoQ{^k^DM=nqF50qwNZ&?7E20!FEugrE&JAXj+mep0y#YQ|*N$tpJ^?Wy{%; zkgg?9fvCH5EpJoF*Lq4pAw_apkiK3_W0Z3`5mUwc_jX`JT>$h|Ha7OY@9e6iPHbq0 zgn`*(gjC&X^2yOAu=c+qYng~%FC0L3TGc`##E0~)I8S8P)T64Q-rSyhp!NVKChM%C zakC#t0>4VvrKU4fhllhlKq)_V7wvmf=FNyqaz_Am1@P(jKn_|YB0c`d+ zn2xleQ%(GO!9}bhPH!B(KGloQp&C{O*Mzig;Y_P;k-#cTLH`L|-hp3f*_S*2CYo_) z2O($iXTd4J-T$}fS}YYv55LHXAfHFP({LPlK_3bu3)058A>CqOxnKkz#GoMpC6zl` zd-0y6V_-O9{AZSz39ean3=KEARiJCVScYRcH?=UP7Vd?>}b3U3;(rdS4;`|*{L(_!6xS*3H? zWdF=5+w7P2H$=b}R$n|X0L8F1(@uc zJ7VP_K2Gc=m;&OyBs`(c?H`|gY#I&9-iB>yEAxpk#|l(_13JeuNO)k zJLTkw6oi#om=fhGJ!v*`BL;lNm*@I4x=8BxCNSMOdZRd}2R_-P{JJ(Nr)@P0oMxUd zsC5at|u$|hjY^}p#d6$7GEuB?XH5%aL<+ysQEdJ z{1iIl!CWlZ(LS2?K(nvGk;W{ps2Xh6Pmm-~5Z;~xNI?)a(e4s}v_du(^SoNsQxDm9 zLQs(-+%(+d$7A7SX`+)ol96L7a+R>+VAT4hY9d}JWM`EU=Ucn$|!Sqn)KRQI+<68X!kSIx`PbNo!GX>C5G zjemeOE@{lWxebfQy?8YME2|zlFY6kA`x%F@G}N)bs2B5pHFd5*O`T}~UTe{DvCtjH zqTEvLEbI&|Q37HJ31e$*T_tpDCCfE*Rjvw12#^cOR;=krfm#* zN)au&2myjX62c`hKoUsc{CbuI$K(yjVda91U%p`Xn3$Q79vKW=h$nU=gVK}{VA$j4h$mn zb#9dpTd1(*?z(IpYzxq}&S$e{LvmQC45wsQ&IJVp9c&zE8_vgS;o2U)VCfkh2)=I| zr_ad5&Uq1@EmZt0)x{)&VIcr7OSHRq=9M+Nk|bs{p{Hmbir9C^&{92@T+z74Z zIzJj93+*8iXdg8n8#$<2S>&>v+o^~xV2j3fj+hvjcW%7Z4L+{JkiGTBgQ;@QR@P4M z&Pgx_>@Q-Kc_;45B|(oWQl0t&ny7CYM+F*RcF@Y_w~V#Tj1MUFeDffcxWliEb-bJq z*u2iYiT;*wYWLgFp=Cmf0xuhB;Qh|@7?pkeX}5NSZRgmWtnj_Xn-GK2SSF)D=mZM+ zdGht9gSx&(l9tJVCE-m8?XrPe_0iS|)w0grULeIF3}@~bC54G1nRu%N%m#LFaCLCp zZi{*V36gd2v(bVyLp6Og!lZhkd($<+xRA~X#@V@8f z?)C-s-jL(`n81J)>}iLkH+7bDiJ((#HnXNr4o`!_S~FcZE`by~5j0#$@z$}V{rSbc zVmVJc=e8$~NsX}{J4%@=gpRaBL?5OcO;G3!;3Nx~Yfe)eag4Ctc|3vSvu5#XNuJ0< zrNluNr=m@sxaux%(fnxqKucTAAie{-K|f-W*b1}aW#m19S`c6_l0#HZPVNm+dQ*(r z0nbkdI};vA)0f15e;tFnVonU{@{T6tRV^{)l1mO2kFHu2qsdoE-;DG3nA0Y*?(Zt2-SHoiP zKKnMZUFPVfaYikH5LRfN%!I!3<(p18?7xp3Q4sd+6QPgMDZq(j92rXt&)wo&XTav&T zR&u|_{Q1F|@bvhPaQyG$=Qz4@=KFD*@PfS5t6|;$keFEI-_Wmf`59Ey>3Hi|er6ZBehaqdOnp_E|;?=&8|IyP}h%^?&g`PiQpTQUgQ{ z(xF^(F)48)i7W&8l?agKg6@|RtcF6A>SY7mL`pjch-tA?i;@?<|Zj0S!IGIU|@3E<2CGU@XeC@lu;(1D*7PIp~2-JyT& zjdpJmLeFa1*tc;s|D0`dGlz8o^*5VuG*YE+mz=JhXLz&0Nz7xbic#%a6u=C_F<2`p93O_|-~uQEv9iw*4;)1oooPLZ%dUdyOenl_ zbIdCk26yDVVyx6Xz=8=g?a+dr?$!4mo{bn#_uc2pD4=plsuWMjoXux({K$aZTXBv| zXB+47N;ydCJAjxNcNeUA&NCg?wL!Ye$<2_2U6fsA^Ho7+FT_Yic^WAxO$$t}8@|50 z@F?IsD5V9ORwSXK;f;F>v|6aKw&AAY-s?XPv)Aa_+{gQ27m;`kZmUDUJs&VU!^$b1 z_jm89yMxz09oT>BPxW(qy_v{9uaj*s^MAe<@&ezs-2c=clQ!5HOK!EMIA>g@K=qz` zhf+r>^2}0bMqW3LVOpGB!Z1Uk;;r4v%B%yy^RLmUkmNdSZZnJ~3%-k4fLT%Dtn9qC z0nrA4FIX4y!tg}g?7fv#$%R65j%p`_3wM)4F`b3>$f;lTWLEtvt!Z@k zmg%snzMJ3eK1%3`9%9u~jB=SGg@Uww+P8t9Mc28EP5$IDHB0*YVO;wGHLTfN27$(J z!iV(qR`v+-f#DK)#KPQx;j$rx5`RcDP?oV1XNqj ztEtPQsv`g&tF&hVpIfBqPP8`$2|vjo$AMUizLM?P2hP{^IimV9 z?m!_xO5-YtaS?jee zs**r}C8d2=uL%VzS%loBZ;Hpuy5jGLK0-dRKbeebpRIhJlVGgaDP0V=)ZwG4(Fz|W zEFKAVF(8yXV;lJbvGK20ITxTb(5B~LcQUmuAE%eDpn@03mKbf4=&Nfp&GU<4)+;X6 zZ#C`=jRT!u_N Date: Mon, 18 Sep 2017 22:37:36 +0800 Subject: [PATCH 206/335] pass unittest for deprecated decoders --- {model_utils => decoders}/tests/test_decoders.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) rename {model_utils => decoders}/tests/test_decoders.py (93%) diff --git a/model_utils/tests/test_decoders.py b/decoders/tests/test_decoders.py similarity index 93% rename from model_utils/tests/test_decoders.py rename to decoders/tests/test_decoders.py index adf36eefc..d522b5efa 100644 --- a/model_utils/tests/test_decoders.py +++ b/decoders/tests/test_decoders.py @@ -4,7 +4,7 @@ from __future__ import division from __future__ import print_function import unittest -from model_utils import decoder +from decoders import decoders_deprecated as decoder class TestDecoders(unittest.TestCase): @@ -66,16 +66,14 @@ class TestDecoders(unittest.TestCase): beam_result = decoder.ctc_beam_search_decoder( probs_seq=self.probs_seq1, beam_size=self.beam_size, - vocabulary=self.vocab_list, - blank_id=len(self.vocab_list)) + vocabulary=self.vocab_list) self.assertEqual(beam_result[0][1], self.beam_search_result[0]) def test_beam_search_decoder_2(self): beam_result = decoder.ctc_beam_search_decoder( probs_seq=self.probs_seq2, beam_size=self.beam_size, - vocabulary=self.vocab_list, - blank_id=len(self.vocab_list)) + vocabulary=self.vocab_list) self.assertEqual(beam_result[0][1], self.beam_search_result[1]) def test_beam_search_decoder_batch(self): @@ -83,7 +81,6 @@ class TestDecoders(unittest.TestCase): probs_split=[self.probs_seq1, self.probs_seq2], beam_size=self.beam_size, vocabulary=self.vocab_list, - blank_id=len(self.vocab_list), num_processes=24) self.assertEqual(beam_results[0][0][1], self.beam_search_result[0]) self.assertEqual(beam_results[1][0][1], self.beam_search_result[1]) From f056744771faad2829993a239f1e0e6fd547bf79 Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Tue, 19 Sep 2017 00:02:10 +0800 Subject: [PATCH 207/335] Update librispeech model url. --- models/librispeech/download_model.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/models/librispeech/download_model.sh b/models/librispeech/download_model.sh index 336502de8..7c46c0991 100644 --- a/models/librispeech/download_model.sh +++ b/models/librispeech/download_model.sh @@ -2,8 +2,8 @@ source ../../utils/utility.sh -URL='http://cloud.dlnel.org/filepub/?uuid=17404caf-cf19-492f-9707-1fad07c19aae' -MD5=ea5024a457a91179472f6dfee60e053d +URL='http://cloud.dlnel.org/filepub/?uuid=8e3cf742-2ff3-41ce-a49d-f6158cc06a23' +MD5=2ef08f8b608a7c555592161fc14d81a6 TARGET=./librispeech_model.tar.gz From d1420d121ef7e144864109c50a602ca5b1e5afd0 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Tue, 19 Sep 2017 10:48:28 +0800 Subject: [PATCH 208/335] Extract common utility functions. --- data/librispeech/librispeech.py | 29 +---------------------------- data_utils/utility.py | 27 +++++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 28 deletions(-) diff --git a/data/librispeech/librispeech.py b/data/librispeech/librispeech.py index d963a7d53..a485904a7 100644 --- a/data/librispeech/librispeech.py +++ b/data/librispeech/librispeech.py @@ -12,12 +12,12 @@ from __future__ import print_function import distutils.util import os import sys -import tarfile import argparse import soundfile import json import codecs from paddle.v2.dataset.common import md5file +from data_utils.utility import download, unpack DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech') @@ -59,33 +59,6 @@ parser.add_argument( args = parser.parse_args() -def download(url, md5sum, target_dir): - """ - Download file from url to target_dir, and check md5sum. - """ - if not os.path.exists(target_dir): os.makedirs(target_dir) - filepath = os.path.join(target_dir, url.split("/")[-1]) - if not (os.path.exists(filepath) and md5file(filepath) == md5sum): - print("Downloading %s ..." % url) - os.system("wget -c " + url + " -P " + target_dir) - print("\nMD5 Chesksum %s ..." % filepath) - if not md5file(filepath) == md5sum: - raise RuntimeError("MD5 checksum failed.") - else: - print("File exists, skip downloading. (%s)" % filepath) - return filepath - - -def unpack(filepath, target_dir): - """ - Unpack the file to the target_dir. - """ - print("Unpacking %s ..." % filepath) - tar = tarfile.open(filepath) - tar.extractall(target_dir) - tar.close() - - def create_manifest(data_dir, manifest_path): """ Create a manifest json file summarizing the data set, with each line diff --git a/data_utils/utility.py b/data_utils/utility.py index f970ff55a..e1e3b55e7 100644 --- a/data_utils/utility.py +++ b/data_utils/utility.py @@ -5,6 +5,8 @@ from __future__ import print_function import json import codecs +import os +import tarfile def read_manifest(manifest_path, max_duration=float('inf'), min_duration=0.0): @@ -33,3 +35,28 @@ def read_manifest(manifest_path, max_duration=float('inf'), min_duration=0.0): json_data["duration"] >= min_duration): manifest.append(json_data) return manifest + + +def download(url, md5sum, target_dir): + """Download file from url to target_dir, and check md5sum.""" + if not os.path.exists(target_dir): os.makedirs(target_dir) + filepath = os.path.join(target_dir, url.split("/")[-1]) + if not (os.path.exists(filepath) and md5file(filepath) == md5sum): + print("Downloading %s ..." % url) + os.system("wget -c " + url + " -P " + target_dir) + print("\nMD5 Chesksum %s ..." % filepath) + if not md5file(filepath) == md5sum: + raise RuntimeError("MD5 checksum failed.") + else: + print("File exists, skip downloading. (%s)" % filepath) + return filepath + + +def unpack(filepath, target_dir, rm_tar=False): + """Unpack the file to the target_dir.""" + print("Unpacking %s ..." % filepath) + tar = tarfile.open(filepath) + tar.extractall(target_dir) + tar.close() + if rm_tar == True: + os.remove(filepath) From 27d6cf90d1fe4f2a84b3b237acba53aeed19c69f Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Tue, 19 Sep 2017 12:19:06 +0800 Subject: [PATCH 209/335] add figure for tuning & enrich the tuning section in doc --- README.md | 26 +++++++++++++++++--------- docs/images/tuning_error_surface.png | Bin 0 -> 110461 bytes 2 files changed, 17 insertions(+), 9 deletions(-) create mode 100644 docs/images/tuning_error_surface.png diff --git a/README.md b/README.md index 9e9113d84..537eec0b6 100644 --- a/README.md +++ b/README.md @@ -273,7 +273,7 @@ or refer to `example/librispeech/run_test.sh`. ## Hyper-parameters Tuning -The hyper-parameters $\alpha$ (coefficient for language model scorer) and $\beta$ (coefficient for word count scorer) for the [*CTC beam search decoder*](https://arxiv.org/abs/1408.2873) often have a significant impact on the decoder's performance. It would be better to re-tune them on a validation set when the acoustic model is renewed. +The hyper-parameters $\alpha$ (language model weight) and $\beta$ (word insertion weight) for the [*CTC beam search decoder*](https://arxiv.org/abs/1408.2873) often have a significant impact on the decoder's performance. It would be better to re-tune them on the validation set when the acoustic model is renewed. `tools/tune.py` performs a 2-D grid search over the hyper-parameter $\alpha$ and $\beta$. You must provide the range of $\alpha$ and $\beta$, as well as the number of their attempts. @@ -283,12 +283,12 @@ The hyper-parameters $\alpha$ (coefficient for language model scorer) and $\beta CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ python tools/tune.py \ --trainer_count 8 \ - --alpha_from 0.1 \ - --alpha_to 0.36 \ - --num_alphas 14 \ - --beta_from 0.05 \ - --beta_to 1.0 \ - --num_betas 20 + --alpha_from 1.0 \ + --alpha_to 3.2 \ + --num_alphas 45 \ + --beta_from 0.1 \ + --beta_to 0.45 \ + --num_betas 8 ``` - Tuning with CPU: @@ -296,15 +296,23 @@ The hyper-parameters $\alpha$ (coefficient for language model scorer) and $\beta ```bash python tools/tune.py --use_gpu False ``` + The grid search will log the WER (word error rate) or CER (character error rate) at each point in the hyper-parameter space and their minima, and draw the error surface optionally. A proper hyper-parameters range should include the global minima of the error surface for WER/CER, as illustrated in the following figure. -After tuning, you can reset $\alpha$ and $\beta$ in the inference and evaluation modules to see if they really help improve the ASR performance. +

+ +
An example error surface for tuning on the dev-clean set of LibriSpeech +

+ +Usually, as the figure shows the variation of language model weight ($alpha$) mainly affect the performance of CTC beam search decoder. And a better procedure is first tuning on serveral data batches (the number can be specified) to find out the proper range of hyper-parameters, then change to the whole validataion set to carray out an accurate tuning. + +After tuning, you can reset $\alpha$ and $\beta$ in the inference and evaluation modules to see if they really help improve the ASR performance. For more help ```bash python tune.py --help ``` or refer to `example/librispeech/run_tune.sh`. -TODO: add figure. + ## Distributed Cloud Training diff --git a/docs/images/tuning_error_surface.png b/docs/images/tuning_error_surface.png new file mode 100644 index 0000000000000000000000000000000000000000..2204cee2f5204d1d2d2e53fab8cdd0a1cb9ac47d GIT binary patch literal 110461 zcmeFZ^;ebM7d?7JN6BTMM8xZ5C}QsgSd#Ad-_4USG>-qN9DD* z7qsQ9U7PV-eUXvU`S}NAc+uDw?$q3_5pnArugN|td_34puZ|1#iA~ljd*18%{)0FF zE8}32^jBy&=~sj);iNd$1BBmS&XRW-II)oje7~V~DTU6o9O&Mr36;1Nx3nDT{@wf7 zKK7}-q}6h!@v>s?=Av@x(#x-M@{<+32qyUc7S_$Zj6{z7<>|YOfdYc?-^=u9SpNUL zgp3!2Ed0NBqKLs~|My1Gz&CJ8PZz`I^k6=t{qI^O!w?AWf43}R5r?4scdZ)X|9`vx z56<1bH<>><@}`JY_K3T-|M+S`-X%eP=T)zHDht-^^}$ur>pbReml z25%!7(4stTTaH7>xs0R7jPoM^i_L@KJ=9F8Pk8^ZYzMX7Rc>(0VV~Vs2C*of{$!u7ibxh}8VrR3XcNFDriQ9~)lk zEIOerwFA5MKbsIAaKMjT51rgFmQ&y`li@2o(gvYQOA1D07`Y;`%D;M9n{^*iTG0|# zQc@BjTKs3C;ZOBxD0Kl&Xr{e9e&wKr6;yDF-ANK#f)d0rZq(DKZMuJpc;v2L1;ubn2EZ%E?Le3wd#zyN~284K)(QcOiizAVghSJ|%Bx~XF5*DvI1 z%?JY)X%@oXZ5Sg_9_DYlA7PQyo^;zf2n=)d8C0pX$fEgAo9ZK0v4vi=^BSdU43aQ6 zNkM04*29CZ+*EH314Rt-ggLIiF!^`b0Q*S9fP06g!ZX>L?+T=1D4OshMeJbP_0W_5 zZ5@+^iJDf&d3H{x4i0|s^zwT3`nBo(&H2GndsnpMl>R`(ct9QkYXnJvUFoF0)d|J)G!OcYt+a4@S54w&^+%c2 z%iELN`#*pGGJ0Bn{`@VDMzO~4(U)Z4!#ivmYHI9Gsl16^_r8MkF`7YWupuGO)S5~c z1#-_P5s>4`F!kb}CUJGi=aW_cz`*YQKG+3Y+uL)kZp{1B94utUXXn;hQyc4i&UH%`sYYWA~f%Dnk9$$|zVw5X&cHz()Kx_dF* zwmr8TEv}KH<9i4Ff_SGAbDbeNv+r>DBA6c$LAq8#RB68jQDWC|O-;;Y%kb-qiLY;F zM~5IHA|kkp(NuwaU1OthDz({YI^73n#mGoQfn&(g)=015alpZRqKNQkSst1=x=nsT z!A#7^2Aj&=c(!zXJ`y?utZ%=cCMG__ddVlSY-s8e8z-lvogH&tUS3^8Lyg_CAQ3Tf zlDKc;TgcxP@(w#c;(aM0xPk7)rkGi~d;AT|qU|BXg75K8Eo%vX;%mpYhv=|p9Dl!Z zl)_Pc%xcrF+{_ZkL0IjP03X5-rv~lV;Ls^1*t1?6bRa=QLV8+&2Ah_amL?D@Y#$Hs z-$DNJL04L#f@N6`xUrdbXlMqn1!N8cpo4b{>b?_-3Or^MJe%olOsuNEc?i+4BPDrU z_HbQXTx7-9EaiE`RwsJ@z$I2!vjznR&ow(0;#MK|AUqX1+E}?IpA@;Y_U#|<`S@h@ z^~t_|{rYEmy2b-$`fq+7CWA*%_ZnJ1bL_LPojfLxtbxZ7JEM+^Pou0O8v1?i>n|5%ndp482#^Qq z%!X>r?d1y!7R|k{n^HU~;8^mXVw!zQ?gbiXGoH{@S1lwq5fB$)Q zYinA&*X8L(oT6!a6G4~?NMW>LIDX3qmFaar^S^~JKq}m^9L6s z&~zTBp#FYI*Q13Gva&1W|p)1X$VQ8qu- zv<<3hof#8z^AM0NKvv$_*;!fX{%Ub3AwvN%Gh;<3>u-DX<(%ad&r|XrbbfQy0V7Q( zwuA5Vo47CKp`hFdR-wn5D=9A@Y0?D?Vr*<|{jZJ=8t>HL5*kRDTgjgCE%%Z);Ax=v zrI%B*$R_0a&<-6VM(oh>GkOK1^b`Pw;OQk%I*?)^78Vp8bLf}rlrZ&?S_j}XsM14=1#~q zRv@Z`8+@EZmqO9}=J*y)scD6-zg3a0{?}0-v6-c1*wT_7sD|dYwx%-`x@G#Ef_Cqc zzaV6<8{DlRC0{P3H8lO4nGd(M6qT39`Bm|WWVpL5iCME_F{d+F1f&NJ1=B-pK z6s8?%8hW<1>#Id43`+xYm}}JY{b`XD`0R=K@RK`O6A}iTqjE934Alipp2zDW_gqQ( z+Zbd_#Sv3kE)YUG=)@$^MiRLMABI16tx4GCKM()@63f2_L5i;JHeLl3@00a`SA>LJ z+pvknMR_wbvk8KPHVXewd6^!Fuzh{04DJ{oe(1#pChA}cSGL(tzgqK*Pe~$vIXjSs4 zvzFyuxjH;!Po`!NXAI}&UdUuJFB6d{V4;Aq;DiZX9O3p?>#rZps{fNQHI%RV2jAtW zG75kT$gkhbok?hn|I6>;eN{HZGcFo732gC|&jBtpwzZ9N*`NNvegY>BpIue(p}`(( zLDm?&y+s0mPke6J!S?$0)6mk=(%iygHSApl?Oe4fw&LEELqzLQkNZ)xC=&nWelPq{ zgAL(8^!B|oDO#=()XMF{S8;s!Y$cvD^=kdFc-UO6S+z~tjJ_W! z`Gik?I^DLTiX1yDcIff%ca`=d95d3-h7f-3-P_@PL;K-**?vB)4fcP7^_l6x=uQ6lGx_mMk?`>WS|x|GEg`kTJ^rWe2f27-BbJp#&o~yC zD>ZXM(Lm_X<$bVHR*Qw!z&m8C7h}{V4n(3LD3U^1`=s|`I0Dgx_bS`(&tBj_U zWQT*`+TTtu_otd~5yuK0k^)P(_u=9QR${J76X@NWy3UP;jy^dvBgIaXUs@WT!RJw1iXPbFtYF6BSYwNAZkHuxLN4;Ah1PcF=MB3WiYX2u>#hjknWHz%koUQlN@~{Mb z1N4gz@Ks-$#^3rk|GK9p9nEr3|F1X{YN0{H%R+`;bSHN=;M9}|p}@&d;bqw|<-gVWYj4F8M5#K*mXgL31pR)p zAOo!awL4INNc@ueY^DDz|TK!qHoux>MTiQuRL^PhpHQ@8$sH>xr z=QV{>q)BKRj2t9N81zLr6;)!LR}<5pO2Vqe*8}%{HCip6rD&thjjDZY@W-fFY3K$( znjPz}2kEvyS97znRu?6){ZQnQdLp||m{^!x=^`HH_(At4DSGe}(+SoZKx3oC`KL#! zGFXw4*XNafBfC?D#igaj0PX{Xi+_Juzk*w}Za~8G#bMc}$Zct`DFqQZPzl;WOb{R@ zhKb@q6B}}b#qx$fJT*fHdr_NRuRx51I8rFtomAEdOI3LOL_;~xEGNI5(io*QNd~{8 zR12~-NKnP^RC5N8L1!>S(tCCg58mR`}?_wUR3DNI^;uQ>BLrmUEbmWtsp6 z1~LUNAKzT3pU}&fFNXyC+f=)poCqH;>9_Xu7vdqnNAxX}q-|FV`tN*K3#> zopOlPbZ+JkWlJrN%Z1#L6D{VZrFXPo1gu(!>3X(jFLzS4tE)shATbVPbT+e5`>o+u zOr3X**7FJgJTh&jTj{RJsPo9ev+#k%#C!9%mHB$!B?m`@oyf$%z#pIn2&fH3#l?n~ zM~hT^OX+kgfj#K!tHFl89YsDnx_n9(lz&o-aLmH_CmF6#WWBcgzTh9<-yx6sHDM8@ z%5-9YDBgvuQW+x~#S5U#Kfb-cx_>C6+jd^m04c2Y+(}R2_nZR>$-p4deVH@ZekN2r zBg9~-uE+dFbHswd1qg~|Q&Ur*$~uCF2Xthv+8BwEkr53IjV1Q&%}3f4m9D+R#wRS} zz82auUqOfPDX*1NR29O(Mbhclp!sw6pfZbB0J#9~5{ZGs6?Yj&sO#YuS-h{c;a_sB zuHX3nf%pNMiGe0sfByXWyg;q{n}-A)tR4WF=4%~b-abzx1Mbv$+^0j~p8~K|d?3gW zOGtdnAO5=N0-}MF!2>7+kNfdTu3gUN@qu!7g{cMy;+!k!?|jE%pJ0zvwe$@PsKKL&neJ z>=K*`;$Z+cPMF67p2&c&9qafo5$*fUX1+|jF$%y)z+^4V&40}nK*=6Y$gWS5I)}%{ zF_Uy%U%4%L(A-{{HMDr{^t)W&G2Emn)WCCnM zbVRC5f&%O2_I4Kv9biENY<;fLj!ZDiZm@ys{6JfnE-*@x^|cSX-mJq~uOni3q1V9e z@kpd7c^{HefUSjChG%0)w^O>?i#)%7r&#kXuS}6x&Iv6Q4AT#P+RUrf_#l^t%P_&6 zY!%B8_;0V@CB-?uCMDhctD*k{Y{9l^uv?B&32SKJZ03fa93%&G>X7<^Y_1+O9TRUF;>3#K+ zY%8hrj;hu55bnS0X@ss{EZM+5JS>v{CKx30BlGD?FR8~d<>YtjLy0g0r zVEaognj9dBOwG(_@Yas)T5NUR>ZW@^^P9)n?NC|@QHi{ZnqP>*-k?oMTYbpmy2w_@ zWH3Ll=Z=`>z2%b3A9tzWX`Wk4^QV38Hs438TrsINxoXNelrR9qGakJBY zh_R936|A4w?lZt^WB_=F-lCImcP+JhHSbrnLT2l&2)LHq-)24DZR5-z%r{0aKO8P^ z*@0|_Gv={;yEL$TH?kZLqxu!PVf!ujL_DjpYUZ{fxjdI^u9n#&9XvtQYk}X)z9*<) z!pC#ObX@+b*VWZ+Y(CeFg|Y%D<#C&Ia$os~`ghp}*@*`+7fg{)Qtt4pKMUfcBY$V5 z@fDNM#s;OvEh24g8O;}*#t__w@D~;Z2&XB3M>2S#b@88AIvAr`1<6GEtDoLTMJMdQ zt1yY?C>fO-C@NwEqm!-)Jq$elG84Mh0uv$0PPCbyq_+lS0!4nX2`V}tJi!yDx()-j z%=GW|Q`|k~9%7L4_P&kg?^#(P|77KRGnM6dcje%_-{_0#Uu!mYdIuYunw;zbX%8^# z&6fR&^=+X?k5_tu2hu2-v>}_GmsFKAsKGx&*(=Ak^8j5P9UYCZ;&>mUptdE?5Oi}r z#`1XHT9oAHVmsW-b8z#i$~Z7zIYAV<6p9j^ORdQK(PHkC})$ zo#c%T4X4xEcKJS|F=jwlloFs=BX?I(857d>N54_rqwv2n;;XsM{PVir?X64}M8;qm*Kl2wuK)Qzhi6=sdSW$#$>zl{-58i&i;X?>(T!WukI3T)4Cl zm{?>wRYS)sCwe!^NgJ!+22V6ExC5pBL-uXU?;|*sYdR-FNm~Yi>Jn7p*|7Qt8;;d; zV!iNmW}F>9Xj|NNrdO2IYmaZ4nXfYe*cQCsRh*ib=sus(&qDrqH$irfm37ri18D*T zoX2qk`EtpNS^s7->EnUs%og9fGQ#2sLE6z)X8dpmP`Rw8cv&8g0{Usby1adt#?6MW0Xhs}m*cX>oH)O6b>5D)l=- z;(qm7j|4*a`gK7$qfb&^LqkK^e6Z*n+tx$Q4zTkbe!k>_RGE#v*7LnKvF*HfeF|St z(w;SK#LD7F);evEerr6Gspd3p71cV)^~uzTvzeEWx&z87kLv-UqTn?pP-}a(Vb$Os zt_QQq@4t?wV+jw4kb1!FPd)NYKVAeA3f&kgoD?d(u*p|w``YR>Fd2#V+-EL-oJc42 zNM6YNB3LS@3p&=JjB<@A7#Xu#_epu?@2Z~bi;~42`+k^?W)orUBe%GWwc*<`6nZUq z_0wjrvP};EReNqL4EN6EF`zMUJ8np?3C_4YEd~uSghAyYFOxOwrnIp0mM* z*<(a*N)Dcm)8?NzNa3IqXxBY4>fPPl!AE#me{o14{NR#LW%~t@c7t^ekS14_+WsdN zA0CJ9n2V6t{h9IXAdX+~a>gy!M^ZU>lX8 zhfO8y1N6wH{`GAS3~(-AACmanuS$%x8x%C4&kYsG zr=eh5MXggZXaQ+jS63I545x`;do2XwSJIJ?60YTtLJz+i9Qy+bB;BTS{8l=pb{Ypa zYcch-qCqxHdeL87VYB%sJj3oGfk#Vq-FkTToVINzlHlvAhoGYiARMpDqs<8^GI?B2Kx7$tAH{;v9j=ZI1uvV&9Oi8r zt6la#zV^LZ{f0!|b$_`$R^GVu8ww~bo1mZqNa6Z-YXk(ZL>{lR9(jQ7Fy?zv8xbAt zG-r}@arFJ44J&kOc3@;5SB9b!{=a- zCQHY0SxV?eS@32e+LWXc5JEfebz+`NFHm!ToD=DM_}oyz(ng!L&K7t%*U5knhgvu@p)TyjLTr~d8x-w-~>)a3OQ%gBos zQbLq`vbA*LxtLpDw?$+FC&n<>TihBE9qD2zkw+xPjk~RuyafK zmTEKqKk^^E&JEIJwbe{Tn~=H0+0~ZsF=^{ z`WD5?z+&r5W8dyfD*W(98$JQ0D@~MN+keth*JHZ8rJIH48|2xEz~8$2(u$W_UJoI! zS`q}3m3H78bMCfJ_6BCVIxIRhI<%4@mFw2ED&4n}BTit3inGc)DdSFfZzrIBf>iuP zU3Dz_jHNcXeWd&Xt4l@_eg$C|a9aXEp~ik_?Y7;Y5`V zp7GL@dUMui&ZYXlL`|2^}2Pxb9Mfa%{szX_=1Z= zxpTOqeQ<|#{&)n#K-@I3VjvuUo`R_r%h{+j<_!yOQ*%RvacyAn~^X4dtO{kz+j>E zUVRw(Kb{9c zu8?O9*EuF(LMtR({A)ps>L1c3%0o^tqxRhCnBSHThll0XE;Si~EK;=)j6nBB+ki4Y zSO!ld7EaF;e%$3m;M68!>b}<%`tC4bQ-l&br=tUH=nE6MIHbZMw*59XoHq>1_lfA0WKZnykFrDA{l*rA(x(f432C^p&)T|ME}fXA1Ytn^wqs@2@&yX4Jr3D z<=+X+@AG_0`FHtgsp0tkb1Xp$Jqv1W=Q>!Li4rz8Hfw&oc+hj{NQ%*s5vs|N7-{%& z4#6zuau&Gi!ogqHcH(Go zb}34Z90NDKV=?aKntRsUY!vGqf)2y^|IjmI^A@rhQr03BuIv}`++k(&`VVfjU-p%a+lmlL(3A6&z~vZV82;H zr`Il-gI;QsK~9VQz$yV!R5phn_uLz}BD;}X$aof_d;PlVl&g`Mss@pTivltO0mWW! ziorb=P}SVV#-#s|)~9CeT$SO^Sk|xxtCLK>JI1ok8`NY%AvZJg*RAa@A5&H%#@^YQ zp*F_QdfrL^QEe3Jkr>$FZN|TuX1F2;CmR{vOVjX$H*$js&cT=|#=PyZW^wX^7<~|ImJQa-m_#HJ-Rf=FjBr6MNg8&OQi*J zuVU%+)KnwTk&vboWhatH9yMqG-3slH1tEkTY%40d$NbD``Yo?>pqPXD6T0P8TqxfaR6AD$kleAQuyzgDH<+5 zERz%Idg|44P$@oR{&#uYbJI>jz_j=WS^HB>NN3=RW`RRpxUC&?k)2%w1?GT5h-5A| zD^k{&6lhLL!g$S>{cT}6&a{tb4w*YGh_s&|ri5&oF9Qh83I6QKXT1s%vG1;+R#k7^ zZ+T&O_0OtOSyvbMb&QRSDBcjY_d|ClhXyGe%0F^y)F79f(HlUC;$m19mZMnh zEhp}6y$Fs9!iU_y&BY)aJVoYvl#`<&s>z;Jmiznrv&&0kE&RpBMbI-`8_HX@R)$lu zia^3JetkILn7*@?7!QNRpsm%f!ak@GjV`FtecZvLSs%RU<= zPl+wj$|TsC)lxzqpOf8!cp3XKBIV6^N=amro5TAa)xN@j^R?y026X2E-r+sW0+c+_ z^_{)*A>IyHq#>c2y1KHudZ0xrkkuKn!mBl{Kj4C%3PZ8=1-u#4EwSOb(%>4wsDxkb z3t`w_f|EyBM$z~}>_GbSnS{gHQxSLr+t2-4RJiMlNeVweOA4qeAV5JOo37L!J6dej z>h$#yiPP$a8*gBJywzaIymFWxDNC`S7}+mEA=uAP%9OZZuC3h-ly{VERMA9~+(Pw> z5^R)YB#NAGx_a*GE%?~nvtGgUCqI&~Uu}W!9nqkmK}^vv{-Ybth-y*Q(-XCZ3#18w z#j4TXV~GH#eTqoXVdG$ez`EJxOG-;?KN3@GaMFbL78e)yF6)^%HPEhI7g<;Q2O-cY zw;t;%1NPT$AMUSw@7m9_uuU*qZ|+dKlxQjGR!Bl1{Lu_Et;||K`ixkCT}lizUqR9Y zE9MCLO`q!*ez_f_%gG!1oG>d_w#UwFxvD--rz?lRkCsRB@kJc%iM*uDH+%l<%tTwc zSXZDXJN|d-hmSaKS72Y;+5cu-96QH4)LLMpeO!exJ|OSCyUgYk7w#MQb1q*rf!4>C z-^8h|A(k09cJlm-y${ax#p^N`qI{zts#E=U$kkgJRNP(lih>Cvy9aiF0utn>wg2AI zp{V|oP?|vhg}xnSHtjK~4Y8KZAv}qTZS1mImPw3@Jpp!HlUq-X9UAgd4qY6Qi6pc6 zgg&Cy?GI*IK?@!k9?sd#4e;yg1Ls&SR|UkIst)^id&9eJ{p$X&n8Xll_J&N;Tg4jd zgf(xY^T9hlhkd?OIaDli#8adj(LfEE6RiOG+n@T(iH>6~~h-8^FRJizBp4 znpwNZHR6m8VR@}y2JO)X&Lr>5zy2nPe7}Xdzojw=l<8~lEANi7gb~0Iu|k0;3dB$> z^k4!l1QS_~+xv&Mz)0KxwSud_GdhFyKYXb8%z0W_`y}pmJQ$43lnCj>SpM(6(VX$4 zCK+khc^F3mDsj5;p>BG3WTe^+syAJ(tx@yhT^4e-=XH{%-)$00QqG~~KN`Bb^}O+_ z*NMje(s>yyl^sI&4khenAtR9v%6N1V12OEk<-Zxmt>>wULb08U9a>{b=yNB2VV`X0 zdnZ&olgih8*R8Sp#rHhj0@p;!LHi%;XaSTrH*Gqv^^>xTGTJ5_7BhDW-dnYZS^W1g z)_!p>>?}k|q=AEuMuT%3JM;wEeKRauh8gZ;blK}=S(ZT1W4GOgv*5ToM=w*WB^5gB zyI8oq#P*ZFRF7RK;tC&pz3~VC}rPR1$yGeYq2L zvFUR7i@i$9=B4M@$B5O&Hy;luGo_$Q@h<+`5n*}${ZI_0!%9gyyy&6=w4%#Hk@|Gm zYo_jzf(999htop2=Rp!>g-G>pLRL|8)KZ;&q6j1<3c6o>rNTLu`EMYVH9Yn9CLk%E zwA##vR_9L8g5)NrsL1?5K>ybT7dhREI6@U^-{YZIRn!A}<#u0%x1zVjum^}hX?Pak zHyQf2y~qWGrW7`_*)|U+qxW>qlj&Jmg&>UpeGxdcp7h?s!$aUl)Yo4=z2PwJVu?80 zC7bDMyR?mf3M&0+3Uy-sCB&Q+Kw!(Lzc4?R+HWrvUTS|HG@Q;O&^Xdddv)Km3e0^A ze4{LKU%DZJLrGeB>Kb_%N5Tk`;`gxw7~9x`Ly8lx*z(M$zWnoCsBn#Fp)J^jGfaH@ zK60GacCp`tEK7+u_FWp=Dx((g)ZN|8bW&p?8_aWp2z@cJ!>!QsK&|Zp`&40Fv9Uwb zTWQ!mNB2el0X51a7u|~#Dp@#P;N^L;?VCpPTL$$PVL`Zcrzk19J!{Gn$z)n z|9%*ABUN@4O0iB!+)fJ7ZR7XfX<_+Wec$+P3d*FmHZd?1gg2brsF=xcc?p!R_bH^H ze*DtCQgS$_vx_CXzq#FX5aZYVs|JkG#Phch=P~4s2_bYA1GThCy)~UZ({*!RK%wE!4 zayK96Yc$ek`-8Yep19_qsr{VFmLF~jfklR4OIVw&zP`TsrRv16ZcSi|?%x#Ej@_c- zug-AaC*8%Q8f^z`D*Cw+q2gJ~t}4hi7lUf>Mq2oZCMM3WjysSG!vJ)Sm*$e9i1~Ie z-;g=dZE)O%Tpbn(2@<;^YaxKmR2qzpi6jO+D=rJ#A?rmIhG;!cevaioME)N*frX8a zjE;i-_yACl!E@+%pY;9LmcDB9Nq7gBd>r6;OE~>_O~CRoL|+;)mPXCVBd&Dm&r9Nc z!lE1s^eOa~Ym0w(-P>q8~qhFT8o?a?D^7U>Wa?O7UW*ycz^5&K;= zaxeEa=Y!uvaMRwZg3 zldQCph{V%ND8c`2P4J5AUEnfVR~Ex>YRTGy+VlQMdkY+D!(VUrTjS5LmQ3_ham8I5 zzOH7|8OWPv@_o#SQD4SZ_@%&OV*qwo?cDN@{W*Tn_xtrrT9gtl^p_!LjAlr7X%TWZ zDAXsH$M?5idwIQByp|)0#qjKJs!3ZXX?T>7~sZ!J`E5`#^Uk5 z+J-M?%SAT{!2gSbxwv{yW}ItDLexH`u*59Efb-!vp4&bAp4^r&bNI!@NM0BE%I&>= z$=UiBCeb+uClbo-WiV&s=eTbmaqTm5Q%`d4&s^8Y`RS+$FU#GxgX$gC6wh5<3!>f* zwQ@^<$SePJ>=_X_E|YWWT?w0$>B}wJVdwq+NI+!h;?No8mkFZs5o?dze5}hJ&l~elYcF-Pk1hP_fB3DYBgj#;`3(WUg+-fx zs@HU2v)#hvKh=^#m!G^fJynN^;F*lz6o=AAUU^VkGI7rG5`Bg^>f&3m) z4R16DQ8)eBk_|1#3xxt)%f#44dqYF6fl#c-OzmDhRqgN;D<6Fpi8c-O_16mlF`o|( zBydqBBuu-vpY!R)S8zKVaaR1aok{m%)inzERAz5+j~#i9WdtY+d+9yJ-nrIj!_rVG ziacJZUJ2;lGM8AR!@~iaG7Sm7iWg_)#SEqXu)bjU$me)b zF3udFv~#Ez?U7fv-)d|m!$kN&;{Xg7_jL8L!RMrpASE-iATRIhb0qZ7cbwX^RM8Oh z3*(2WnHLWSbic0&QWU&HV}=3%pD-n2@V*z}sp-A_v_s19%BqO5tFd81nwEzGg;27)?Q z&MG8YR$PVRZg_5=;|w=A^FXadp2_PHrpb^f%@O=_wKu6pC#F~3&JPED^>LI7d&#$@ z0Do&sneIa8AG<@^jF^wWANcV^(N~<3*64wfSkEZ9wbA=#URGfDtX3g+@oR2A=35br z3DFB4`n_VscaC&`5yQ%eJW6s*f2$-G6ldMN<4|Ev1 zCmBuk;&0?BOs}UBCVtQ?qf<)MFXMbfO^8Jdyh_>yG*jFJ2Bck3c3=GTA-5UZvgd9q znh(>MOD{EpUDtH7dMRNFW z&sZ*^|EI;~)kWnWm6LHbj6%f$M%@ORfR94IQ%2Nozgw&vU<}|BRJ-p7<%0TNT2_Lx z{Jx`p-(`Qiwb7ur=0zs^b|&-mB<*LFMq|f;&9zA{i@i${+gH=FvQ)?a3RReMcY7;= z<6wH)wA@;O277yZ8>B($yF+zOvv3MTxPVD~)+VNDoX4O?Bn(Pk?q-|Aop4?jGo)uY z$Ps^UiI!aXExcOqbMF5vf2Rig>8*;BDunZ@t8$FuXxW2t)@FX!YRa1RxA7 z@^Md@^>99Ttntbysi9OdltsR_s#+?DwbP4my)gP~+NMYS-+YHx3U64|J@y|MP?>h& z%n&(*l<%5&wty;}a)$6?cm$j%C}{`AeRTn~Mx@$k*U z@$maQv~}@D8`-p*^YLnrI=BRw5AzCPT~y<@Qk${%@+x{jwJqp=)bQw*%-!CmGyYYH zN%khbJC}t=Uye-8iY)YEUixB$&BhyKYu=BXuTPwjbfFP8+wfQQZWRk9@W)G?*PwcQ z0c#dTCImXFm;eDC{HqT?-=8r{XZUUM7Y;!jA|-|YZP^%eh)E4H9P>t}7L7;5g~?Jv zitj)(cv5wz|KpCf!bI=n%u=mGC^46q`n{*+-X2Q&A5~M4AyNGr(!V4>mBNQUt9?%N zrj;3J=4`2W#*y}ic$WBWG-mdE6M8G-wu1dVM{>+0US-!8vTWoojt}`#kg=pI`3h@W zb93od`GVi3%zv%NM#o{p(H4HU>LmFg&aP{A>?gqrd1!$-Xht6)eu=PQmxt;oo1K3J zCRujHU}k)uD&u?uIJlwXG{W&p`wf-qD6)aVGj7hX>-JmLD1H=$2RcC$sTcKi<1Abn zryrEwNmws4mEB>s=6TH)C6@RO2%@?ME7$A(9HTLC~T${=W6GdMc+Sx_< zxM^+#TEXmu$FYV8yXyVfd;Nibp3?aK*Re%6Bli!j0sj`@s=B6Fwxm$BN9oFl=!=ml z6aM-h&C#c+Ltnr}F9j35?%kaew-#dz*rBXwn5dxM>b0-$7o1o86Y8%*D3gQVX47t! zsB*GrJ_h4V_?!HIP#rKxfaNbSzObP4J#xq+6!Za&e4!!{Z-wX;jv<;R$#krg^(?^RC<9gGz8T@EAtK)KCt1dDe#R8y*@34XQE{KEKPNsYRVCw?O21a-Cf+R zSC%Cd1x=wD4Toe5**3Yzw`!i@n`BgkeHgo((@kp#Ai#QOTy@T3PriME7hVB&9N zk-Y`UvvaC>;S4XZOK}4V(+%L`9bG z{rM3}m}?299V@Bmzg`VfKQ0uUi_f1vo+>xUte8~j(i?YO zjv3TM141^!2?lkGj;42kGb8jaqiDbZaUQk}zt%i|k{Hgz_FnSy3VVz+rJ-M(jeyZ0 zAWJKAUC47-&zj)kOZAy4B?xF}Sjaq(*9V0B&0pMH&?|ky*+1ywb};fVMtHt+pOig3 zI6QA2xilWeEAV~hJ5eNQJweH)4%$y~I=1a#J^3hq3lm+WVy?RE#>REG0sDISH)L^g z6gCrAeU@+d)I+ujdyh+e^AI#|Ht<&hU$;1l(t4pED%xedDYKmpL#F5GQ+rm){xmK! z{*aTccRPt6tI!rQbdx7e$_7zvw2D#=5)lUBxOFc}U1q#P`Y#wmZQ2`B2KTW*Ul+JO zwN>WQ2QT{f=h_x#bXB@Qrng6+ZBl83l#P=_O1x+ZeFK6WKK7sCdS%(Eb$+*Qnz`7P9ctW+%#Ycn4V=C8hjy zjuXbO9&s*Pb1zTb_IvAdz>Yen*X3M8yE%g*?&$FFQyab7VyXZ*^&hXIAAbRZnF%oQ zHXYOqHNi%K?NbQmA#gS>=KTQ6+Wu3lYQ5N!@Z>LB|4Mdeuy!&xr)Ar`i4WS@hJYe9 z?c|1EO$ptrK7d$Z2h*e*2xyx~*ryW;>NBlx@q>mb^AEwhhpU#} z0$LfyNaz0gPpep%x>F&k^s1|3>Xx!i>^OBvH}V!zNp^cY_(k2n=pd+Gx)I-<;kCl0 z;q6_Jh1WMu+DfWk)LC zQPbDU&d2*Fn;JMJM0M+ZF=uw&zI-dtahA_~l7rzB@-r9%wu<4Gtp9MD1PUtZf7fT- zZP0k{YcD??o#O}Ayt~1yyBDOSq<2Tq2V0{uMc>o7yL0_Vmo@0#-emnX11=Evgn20Z zJ;ydzV0jc1r}C3}>w1+OI)(=j1U9=ceRaKj>)8`p_DFO z(C-c=N@&s>lAd-;GRZ@JnS_Ea6ReK+6#^gjN1CpB!#~_p$F*WN9f#-Eebe@GhyAw1 z5_vabiRs@TYJ3v1oK^{0X>bvC5|DI8Q@6{eM%BN_Bj(DcR@`3tex~|xeeu919|ugj zR6Q2A7l(JU|oQwSkj$1rGs%LLhXv ziqQ<5OU;{!+KC4+vWHc&t1cwLt`V+fw5}%?-Z5}P!0Ib~7_r*Do@V!IKxp(S;2?tM z#fm4uQJq1c=WZASDXXXe?xI4v{V?*U^c4P5t_tO{=H1F%p)8**HVU3T1}9tO(UIi& zrP)$b1ODaja8|XWBK=srGYX)Rexe>8&c(gDf95Ty?^_;)l3cjh-l~*w&Z46Et;S#A zv$uGR$laEJb6AzE1+$Lx9@h(9;e6)~!klXN?N`Pwv*NfiR8l>ojmW(E>0=a_>ls|@ zJ<#%*wTX^$?QSz;@xnLQk{W-FaMMiePo3GvcU8MaFaFM4zfAy^T3`l&f)OS1<3~z* z?B*vmffF1!m#>1Jj9z0Cc~VnRZomd>+p=3AaoPTO58Yl3CR4cc*%^QH+G7Lj;TYrb zJLp?xdY^uH!fwmpnAnS*7h3U&Q^jDiJuf?0NgddE zy~|IoT+6HN>W5ib{d(#&$1qH1Ox<}ot=+?CHGYSao=wf*y8T? zJ0Gm73tJfe!yWIxxL)eEvxp=pKJ%P`aCj3j;H3Ykk4Ov@tRgm%_h%AbVfx{3Tum5b zjyup$jX!<)SN&QEA-t2jxkBS6Y!P+Wf;_d=M!VVyqVA3;N7kE2(>sYRJB4k#3Wz12 z{qv#aLf35=z)+aRzTc7o$`zw3^w&6B5Q8N%3x=Qfs1iOe9ADIJN;)$THzSYr4Glvf zn7~3_a~cqTK+r4RCY$&6=n=B#z(Ehz;4g9A&Tu;gM#qz1Z==VjrhZN-3XK#B-CF?i z)%0+)AK?CJ))=_w!4X|pTIY3II=90YM(u{rL@n0;C1_Ll#o2G|Cd4<9GcrP)bjAOG z8Z`Bl_r&&YNt|r?1BwEdC1|zvyXf9M;I3~SGqg0IbMUFoR&O~n&NUMMwP!ldIq*PM zGf6i{3a_x>*05GNt=^A?H@NFv1OQ7rDd|R_@hjm!{aS0(Y_*8%+TNxgKa5c}ff(n( zvgCmZOUW`yKa$`vG%pcx`J1~iQTCC^c0H&q&s`RofTgnF`gH$5GoyT2FU^rA8ehZGyOZ9F1{>q~i&!hI3aiH<=1w$WFE6hnoOjOwQhN4#4rO%7jbqy9 zGSS4k5}8uO9R1Qq!+=}%@YDNMrewc=6STisKnqOV22kGjiWLDW_u7_c2}wD7Y(JE1 zk`w#6XdPdY`^!8I%OS8Jyv%s-y0KRNo$B0b-qq-(r6cp$#ynMVCPPA3ACqys%u4+S zDk+GpI=m5y7wLZS0INQ4m@QPeaoR|-zl+X(vCc5(w95WNa79UweWIh4Wta#p?r%#frzv@^lapw_^rxdoz#il@K?Q81dgAz9`SaZCRBnb;Iedlw2@ zX)~(CBegZ9n6Xdew`$9$}cwaz}KPf!ZH2DFUw~BvpS~h>UOS5 znk((N#!+Y*pPAtA17lHTD@Xvl&3r79F7;@0^;?efE8a<;?=GMb68n8U<1A?GSPTOquMmUY%V6uMO{82^Yft7mF@h5$CSl7d7G+sv-JTD-SKA%G zzf_p2>7u>W(Lt!3UkGA#C^)7eLV4Twf6;W6QBiemn`Y<%5s;FQRJywx>F#bsx}{<0 zknZkIm2RZFL%O^BJ3QZdf6ZF_7}o4__FY%p=Bke{dwpibWx=$trs4|00bS5$NSx@! zj{0#KI~ig`m-m3K%AC!=4d5+(`0!yJMgI;4ke9x(r6oRkvnuXQ%*{1PK7HhN`md<@ zJ?0(tcIPUzn=a0}F{pTWkKGu(go^uu;mGBPzec3SBiMm#^ ztVKLP?UT`6OA>9A2~Q`d&E3#kkwzxXhIF^Zy!M4Fi+jAu)X8 zw@+K_h{!oy%3j3yLC_6iNY~$SpZpBxSe81$R`H=p+cvO!J>7d<0ANs_rHpw|9vcIU z<*m5KHwNVeprm2lbOSg*0Eu#K;Lc55eJ+lkWEX&Q#d6~Z9|;_|lsgJ7ng6&G0I@{8 znPX#3F$?Td&H4tE4VpdELXekz7E!G+GaXugen{3_)4fI+t34Z z3z_hJu_*MOx8d01auFq?k2Ch=h`wP&)@aM@Xi@fnNhJELk?hXNS~IA-( z6iR0&8Rgf=d8s}hE<>d~A(d_PR_+A*qIzVl2;fkY5GKJ2&9Re<9ZJiChGd`YWxbS| zJc~MOXHcAnkle@lh+r;m3jlz*2&bL!r~T5{1Z4@*^3c0FuN>3S*BfJulp&1rEq=9i z+J^h3_z@`^c|PB|xrb{ypV-xqo9Yl}K*1k`a1SO*HCe@YMy!iG*0gUN__fO`-v1X< zbYLy;5VUBF9?=bm?er*sJnJ;|lN=5_VOh5I6pZD{;BTChB_z@DlF3^!;soVNbd zQ|l&YWiX@exY*^ifzJ{DecqHHiAW3x~D+U?^zCCWsYuimKvPlWOrDS7D=Th;>AU4D-Qgb?#FbVj;Iyo_INBqw3&sq+UbRWEW#tR# zNfj@{@gDcJs-6AM9z2s^inpg24@s<_M^s&)OFY5_qH^h9-hotHFf&p<*wl`Zmd==r zq>4g48Vf_oL-p&zg23$;GkKVTG>TP!-fI^7?xp8JHj+DT7s9_tYyBMBX5zjnJVu)+ zoZqV+>4ch)rBSGANl7Q(*w-Qo&~zlq(4vq0&M@;WD&w+fzHSI^TUFZE@YCRd{Z9cX zQ&)!Elj~!eZnNhr_sqG(ySrM%E|Y?y`)JA0!tZxuwpXol zQ}z9Od6nY*D8uQej`ly#zk|vBZB6xzBK7=H7G!`g3;Gv>w?*l=3>4|>Uz?10= zllOw)e>o%3M1N6$sprOiKRuK0SR`-E@9mCScj`wiI5tYo{uslZlJ9L=i24|#@bhXd z^33zjDAEWP7IHvusXcuy;{Jkt=olt>WPp-p#3Uo0SX?f^bTPk@9lVF6lgZUgqp-2U*=!o0v2eMDaUHwNLF-Ta_e8L;7)@H0SmHM zVIAdbpBOOkUxF>D9yYoIp)#RRyEz+I%@;+`Rotijxg_%Wa&|Ca|#Cs$_69 z9L+ZvW^l)8^y-O)kZ{sQ+n+Uck}2sTvF=_f`%r$Q(Dhp{ZD&B%S>1MCwi?6t4(0xIB??YFSBJOB6 zHJfP=%n{n`N-?yh%MoopigNK33SJ*SWOUy!A%lqXd8yXt2mCQ%p5Y6V0U4B3G!ir~ zH0xx&J#Qw=Gv%~ux-`vI*@@YV!-U`!Ynw*7BQZszgc{E>^;d+ps z0O8;!4eMequyezQ%>;Se4=M`&8%`?7K#P2jF+$_>=th?F_gWlHAj^M{IOl)BI~~)y zg;3662!UL1MlbK;f|w_nwB=91hfpFBO-03)sau!s7x!NMw+TvjZEJ~Et7~g^PnGdq ztRw77#qqg1^qYU^T|!Pbf*`n_$n$*joli-RhVJ&PWz^k*D#3OrC1yCWJY^w{Lpu}Z zYB4v^2(3~D?2)mP?dQuQA}3GFIjaaKz276F!rp%HG&yp=CSq~CaO;Ij1oo=q!wvy> z2)}221GC?rJkl(c^0!VU<}(lSdeYB$QRkLcqa}so$+Km~iK;fFefHS)hhF}Osx`24 zM2Wo(NLtw7Fs)HFNoBnd^$Nf!Rlv<=6|3DM1;374N>eXN+VI5Ee5#| zhQTz80@;(-$9DI9S7LGe^#T4It8?sH?(}n(aSfLG&D_z<&hMWg{5=>@mLvh|#5V6P z7$9bLcvi2wSkJQARGdA5Q0SpTK-Fm6;IAkxR0C;S$LVSJF1rj)`SPI3AjTOIE`cWo z*CtQcKQ9rH<~gFCU>4Jj)e+y|+{GifivkHK>>XtsulkL&UN8P4u9dq<4`)Tl(Jr^O zKhR0#9q0W-V&b}wdO;zE+?a5%Nj?SREei)0I>L!Ttmx0q|I6a|`AARwVDX~znjd3PotT&(t9SBf-9s23y znVd%UFN@d1;I zO1TnMkTYA|t~*+dG?l}=PS%}{_G(MAQb!-h$G(S;uW0Vq$OnI9sEO9U@uK}6-G;Nw zsk_f;zNu&@)f1yzl6*a>Q8MRzg%(2=( zpC5JDzWOjLjnw&}p;xzqKc~_t6j!b0hQuggN7CrG&Ns|5D88xTYw8Ut!ZDN&7oie0 zFORRG=X>qY*`xX)AoObVMx?$oQ1K29!qv;W;@=d>f5}Zg zY^>PMx%HuEl7cSk2f{Tts+mACK!^TI&X{h9+XmBB$@^dSI0d&L-YiB`rV!R|{K$$c z1Z~kLP_|c~R{YS`9K?HTRNgzCgYR|U zEc60%_IQ(n8JtUMT2bYB7N~SdzLQASeX4OqhD0PkhTIbaDPt%vLZHVP!I51 zhboS~hLDJ}=Hg@rMT_4?%%h`6W@($`C7Z6HFSXU%D+JW)74?KiL*0n9l1n#H2haW) zxhSkO)6>`QzibH(YgboKU`^y;C)^a28`C;_^$j7|8ssFPpQ#9aq4;BxTy` za!Wt{r)33|uyIgu8K3#$S)YWR=URSB0IOn;%moK6`xV+2@i{EqVoB)ehRT=x{zmM0 zqIW0%g3+gyI_%6lE1)*_6UDuk9}$)O_T(2iOI{+$IOCfM7bHw4#jhBFfp-;2goA3R zaPzr>jDqLxS}N1rT6#Rf0lP!ILE!)V6KstVxZ{;Nzkxs zW!}LKZ>5H=52O8GudtG)*G08HCqR5XJu?V_(S1%&Hpk9s@Y^a_3 zTXTdk_^-&I2nL^%?X~&pTJO_NH{O~X-e#Hbe} z_AGl#R$Ca^Nu>5H;x;RVms5}x^sv@=HeY90+X`p0>KfZXR#4)YLgpCOUuKp5Dg2WG zxX`nZmgMGl?c8L#QcA-O?VX&P2Bxi1bii5XPuRFCf5GATR@czb>k9aU0UF^4_MtSP z2bL^R8Ty*$$n(e$tjXKc*V)3oDiMYZVg6^g&D*#W!a0f2*+p}jJ#jc)BJwCPtW^eZF?CgLyAD~kPq|70O86Gc318~(J13AjU z`TaLQc5b``9iE-52YMQ4fN$l}DEG&qizZP$&vMPbM&7YCe5m_^V#|ee{+T8olRN`8 zaC_na+jE#SYRKYq2xw|i?277#b5GzrnS_QNqd=dEg_tcP9Ycg|3sN5<4TX}lnYlSn zTh=_@{au@UgO;r*`k~d%AC2u_4=mvok@)+bpv@YIKXGk+UUZL)54a)4hhxUrw?cj{ z4w6+|{;OJdq!7&bpC_Vn$NEwogV?F=tmxG5{P9XjJ^nxjv12?w2Jla`u&F`}>B`NT+|gx+U-CPOHPJxYZc-5PJo5YdpZGGZ;>Ky~n>>)j(CFO$=^ zOe!{7Ii&;n=UWK;>}390p9bI&VbV;Z97w!QPT$L~RbU_)FlPEN$(eU--909}s^JyX zhu64S>t}5KKi0y*xk4KV$73xuH4`M!0axZY1nNv}FQ&tGyb)d-6Mj}dW-crvihlUc zFF@gLbvcKeZcF2B&&rA{Dl37>K<^y({8Q7qA&nu9`L#OvYYS1dGcOA%ev|EkG+?uE zxG1R^&b^)~VXk#hRH@B;F5D~;by z_}XJxMDC*{3aR+gNXx-G(9g9jpt?H8lIqE6f~a2xV#izi7N>dzY<{Qj;GlyovzC4U`Cq!8cQcJ+`G3=2vbv(rLfX~awCx6J`yXioX z&KA127p*u2gt$?svKB5|@&z0lkge4R%JoQ3kL&KH$+6jI6_mhIp=rcI zMD(`>v^;#_#5ow96Lt)=PA_VpF&I!X%E%JC>;51CYj7p^O3NdWOo#akC-Vaq0cj|R zqQgnf*jzu=rl@iSyZZDI&>HZ~P5q(--Uq5cLo(kPtAwE>hRdrdd1Y$sUw|T9Lr2F( z`-mG!_UdTg#E*dDIDN;=4ucd9GdBosUk6jqrS$VXtl)JOtSni0fP7Jnj{?Gw-x7q`^Q*aeXfs-1ba4?M1qG$$K()%q%BuA6hUk4?a8RbEkPR>2Rl4#* z_Fm^l=d?VlKMMXIwS@4z4<5U`8EqjC*X;N~$snh1$o4%i*?L3MM+JBz^mXkDJxzx_ zRj-jsA)da(j%e8y_cSij$!suqJGj5KoT#-iwIs-KKc;qZeeyLH6{}(yDyWI(>D~a|L2spa}uMmR2U3TD!~D)~lIkod>z-&MM!2 z{`OXj()Xtv$v)Ft`~Xs^mUX3G>)`fK(!$cx=GFexGGOOXl4+73k2VyX$h12aEee%J zImd=iUD_<-Iyl}-^UJh^;ojOtKUv7;_M#B~*{r~p09w>O@RTY{kCfuxhMn}sWzj;G zD^dlNq{I`x@#l{e4%vOjC9VTUWSF^WkhSsO#pntT<6w{UPmQ@86I!G_4es6UIY#-Z zsuh@r)**1Gp+iXf%@@Ohnn=zMs19|mpV>%7$sgw2Z0%5Lnp3vX$agfq7qagk$TcQT3LP2Y)yTbk0ONc3Alco=@K-R5l!KZ zwf<*HBy60ul`8MELRGSsR3BAw&Gr?FiY&(It>5vFkE6C~r_Q6^STN7pzSth>u^;~J zxa-#YS&u2k+20V)P0#Z|aV#t76kT9fDdZlkaiNd4+^eeZOxFFHEc@)(pzQ*U`iTr9r9z z`W=)q9eLAi_Nst7NCDaV9DAb=#Yszuc}f}L5U7M-@pI2`1&#+*I9|;nLB~`$jGSgC zA6+pSz3Zc%cUf5YK(Xr*$2K;^QH-pg@~vU;8~4Q!PgFpkNZ6M=Xgb%FzCOmf^y>Wt zlMGbXoG=fiCTRwE;(330K2h!el;{7eEYBxK`Ez};K;{b_9oqHu%!rBBSQdd>x!j&7 zId!4|7)B#iD3jR`e1Vwlp-oQs-X=eU5+nk%M%6`boubhgz-T-BHR<3t7IWTylDHJC zh&rS%9d0&RMY?F=J1-czv3mT#?V!mfw$pfdOR1W^Qh}+m_OokA7;P~7xhal3tsYhe zKRe3-l85m>M@gFW*NgtVp8|rqE$F{fm;*3_1}apG)wJ}RP_Jw-pG(uYPYS=HXf7a- zGNM8^T*`h966b&f&SpDXhiO`i5mJfTM}N7`7!k*=5yFa|CU?6W^wMT?Y0ocD#xA0K~t1RQnEE>=I&Q3C}ksIsJwoXU;pC*$b+u&JD#7t z>Gj)&6F#daXJl+QE`2iw%DDVAXLT6P*7QLEI}_Aug}r^vUzX@)EhE{^>}0%9JU_2U zPg=wdYDh}@Py(RVFu*W2{iJr9)LbC~EF!=0Fja3D>Dbs|$RLnuHyou=5E$7iedGFqa=AnYv51ik#P!I68IWZ?B_&kkDUk5Ea`s$DFzO)2!7}b>0kS zmBx+|@2k(Cun)EbQ)8l)hUBSL7$rQsZXU5JNfhtDSLvf{d|y+Jh@zDI z4<$engZTL{{-I)1s|q^3-d`ruRjE?>RxN$ zcX6}cf+{b}m76@d6206X#>g-F6=Il?TNiuvx^!W9 zHUIf1$JW&vQ?nb^)XvGV6{U7(okO_qO0dc3KyHdqkY+o0ES;fkv~>hH21Xdh8I!QB zaFlsig0W-{@#3Idb6hZoTmWwb30miCod3zUOW#aKRjnD`Ct=hnlAnL4Td?65|2kB+ z1e&XppvSmi5~*5+u=l=UK*&DoQu>zXCER_3L%oyw6LK@x6}oinA1V z6K)H$!=|+LEe13m!WPS(7lQYu`@l^JJcWKIFeL|Vet*)aCPFhL`l+z(G{BIRi^8u_(H-LWhQP$$b6FN@0sVk@25RhRqSXj0!x3LON- z$(~!P+bOmC32UREw|hW;Gm><=n`k)oQK$rO4Fl8-HB~>bU5h9FJUtg!_xVUJATi`x zgmzH`Pt%C+gc}c9%TPXSnKCkL>GmWNl`Hlu_i(_3Jl|Qse+YABEFElVjV5ZcoNSx zEF+mLg za!C+<5S9~(uZQ7>^EhGc*vs+DP+AMBfHR(azvz0~aBDSsMQnEWJE9l1dP`+JF0q;8 zbfsB)_)pZari3stCQ(ox*&e*MN%OQ@)M-RJ zD6CiVYNt>(mh3qq*O7VBk924(D@ehjrH3yDA9sCTh z2)cuYN4&wAzx~|Kzx2svNg7s-4JLpQbt}Ce1on!%cx!7z8|!+4XjJ;dG%>Y2*t)M@ zFv@BaU@7q62;)S$JBEjih<~@G%?vx&;z?j4(vJBdX|JvI$LL#GH_a^vGem7#GsA%1 zy+0!X3IfcW1~X63G~n3k+S*1hE|{y_N35xF!s(VGCy^^ZdSD7lqhvH|M-UJNl6cs| zpU3GO@85#{T+M$CT4n_f@EZF&w%DDYJYnyAqZq%wh4ygnBuuj-`ruy0mqar0P0}MS zCr0kr(lsTOUq3ltZy6_NCkvHy45n_l3uULWOI@qQf)&(5F076~IV9h+MG*dixp+T! zCobr%xng657It`4p)<0T|0SYl2AY7-wu4v3p8LEtiONE-b-u5b77S)CFehvr$?vb! z7Y`)%=bP>#3|{r`Ey?$kaGyS`ne!<3oD|5Q!=xX{sesXX_{3HN8)rZ+QZ8Gke{egxMfY8B`;^Js%si4Uv!=?PeSA}N70yjl7uv$k( zBw7Qv_=w}}3AxnKhs z6@jeWPCG=OCJyZkz~(y(EQ2JxI~hFHCeWWEBP6M)3+@io+Xpu`$SM!nB{u*U^8n6S zTO(j3Dym_g;fBu@Mz+D3ISo|^zz|Ws4!?&72a${9vnoT8{%mrR`#$&0qU;7!`dW&T zK;_m>74J`dHV5ObtRhb<&IbLG>%d~O#S6Y`i&nz#jxllk9krjvDbT%Y4T7)Y*B0}+>+maoy98k*0ze+4LAuEqNDS1Zl1D{6n+C4KfUYoNf%XAmb zmRX2oZhQ}JRX2zOD8g76VW3s+QySZT(^>F13AH%k=tf}1E8dWaCdhdf_7&T{C&zU1 zL2oGJ`VQwOnHxv(C|ud+GYOVMi-3wVV$-Zse1E^{-L|Lj^>(Vd;0J>#vK5Za~| z;(d7hH8q?deUTwc=DQDc#N_{glC&hxqn`-+h^;Y|$McE(k3EEbL@>wq*Qk`C09o)! z*ymi@!I5{zwnQFx3v%5x?U7GNXNjYAS+|0yjeE#&ZCt%-dU?(tH0#vPQ9q9PcF)34 z?vXV%evc0#3iu!f84wXo5GhNi`^s|u6gD)nt1sN|aeIzGcSi?(8P8HZ!tHww(>NjwUC zdmrkg>c0_!Vf=3{CAHZfp7wzZ2QED8KZK`()EHej^RZ+*Pbl zTUcHF)>7F3{JnB(+*wz4ijX=?;}pVH0xkPippo*Lv-wWPcZe9pe4~0e`mp81mTV#)O^G4kcj{@IuVjjR#4v)T(Y?Y*yMz5m@!jejd*qkc1Epm;I6nx; zBwj}o-4~iWx`2+*G?$it#vmkJSy3JrGW2;G{;&w$aBqB*gO)+*gtDmL=r_mR$OO-R*a@L|- zdPec_kl}=Ejm2y3EYG2)hY`&n0vs@h--N$)%*#?9xL0*T_T2RXNIWtnu*L9#ClUy` zqXITL=`OE$F-UuF)AcgN{pHQBQX`V7)Mvu zdI!fZ1O})H%5^fonkQoYQhjFOP|O$T#7GeIE!e=@NehupSo#(C`PEW+C)R%8Kr*s; zOhfjrp9tI-0J^9-*aw-2;Ab?ADvJu54)e7@g**7}lLgC6E2DbDkI)n{BBP}eG}czE zH2w1-^QT}{!=T$%+U7lKxA0Lz9G7mof6y1^BOT)+M_@pVIo+NZQ0_z(vsllSoK?uk zfT`A8Eh06cEij22rx2Z;n#iu`nDjBFa|pX+VAfd*^tZ(kF&Hl$m8c0V0vP!HlP(yg zJXOSQh!I)4rAZnB>Uj1ah`?l4-fQEX3hjGfP^zh+@y{{qkr1fVb^*sgB5R^IL*Yva zpDPJ~L4Hx2)EW`(ePsOR6RNxW$l-Iz@icn+gr3Bp|HfwD#dtl#aGy$BopyWu_s`e! zN2LDCw*EH)8S9a>)ho(3n%nulyiaOV`}1+Y%Y9$)+bcZ% zUb%n5r1=GI+PM=4hR+rK{Hh&|?MIbL1^p;kKUpMM^{kBgb2#ebPr+Db`11HJ46iPd z9@ei83pyjV?LR4`M{L(dzgO1=nqwpxO$aJ*_7*A!mD-f}P3KiS*-n2RM@%+>h}Wd- zW9G1#W|l<(EK2;W^1+Spb0lHb)MVNymoqVQp%AIi8Gd#K%NnaAg^0Ha`?x({Ng`7w zzMje~DuDVJAOG!M+rjP_=9jkBP9kY3r?A-D0!5O7e-g~tbY4qND zd#&xF*}1{4J)gMVcVhhT-F0tyW4DJ=WoGNpv+(ha0KQzmPum{fCWm)#1npG-Q+GIR zy}bYuRciE)E5Kqc(r)W>#7U6}_eMAk#J_#?mb1s;&1W!NF2QBr)aP=qq0StyYI;yNmE#nmcsrGKeR&^B||Vj-h}xBa|)`!t)l>9hkDz^ z`!?X1&yIoROTGZy@uoA#blX5uQ;T!nB>#91I`x=qz6;X7gNHh4z14fUv3pHyzYA~w zm+5(8`Is(n|5ISS-{-0S%~lz)r{ z@+)IQISYI)3hqQW)`U*qc!5kT{dP&>P%#+_>bnw6rQ-qwxs@^8%QMrT|W3%{9^SalmI< z6=1ze)c-W7-Zojw*Fms$*Oz#F`fC%N|8ZgL0_d}{8+P7Y$zvsm*MX}iVw?uRG-FT0 zV`p#d>VksZkfQxhz)-qa<}f+{Y8H1t6Yl%T(YP^(1kCithpRC7735cAj0|nT_^KP? zruC{gk}xu_H7YgarA8VP=4bM9W_JO4x@wkPaNm1S$kpyUO3wm!^kHGfrN<;BfGO!G{rPtZ1clfrqw zt&SYHiRl~IIt?tE5fZxGuC&Rc%N)fbuFc~}tC|Bw54j*@#4z~QkwKaDf-oM6!lN5 z#9%Zy{7zg>=jZ1EfF1`IS6feCM`!CzYt-)zv<^Jq+W&ZHggn$ldq=poKdzw3aMDot zyzKgnM!)Puvp3$4tUY}_0SIF< z_i#w6UDCjH5H-!lof;gH1#J7P`Mc<173;62N?XLBP?E-tF{`={o-`|B73tj*?BKp5 zaVbTU4wsXlgd#AfhH+z+HpHi>oX?JqY4_lvCZ%|<_*FMx0WIUE{yQiroNBmc_5*Qm zY7Gm}h55?S7~BtIfDk5n0{QVeV^O{=+ui}EBi=`W?&q2+?qQY=t zCQ2sTAZZD=NVDF|b#n}&upK;~5|p*zR9Ugy*`h1oJCsP3r@zaBC|lzNmxDi6wiBIJmf}Nn(pf{R*dnW5zvSad(j3L)(b`rHmQ`f|K>0Iw3e`WHKB7%e4SZgkrc{ISq1TVOA zaZzjN(AwHscdAxKMuxAcO0CT6AMD3}Q&}&pZ#L!7cR3#)NZam5>@MEo=rW+@TK0K; z22AY*3;>+SG(8KD4{ibCmaR_2kAznW{r*6WA;giNdzQ!yM5SB*Roz(sWUNfJH~v540I#W+|=X{_cOCtqeErW`erE zW<#7PJ%DZt_B`Rpe-5V|=~N*mjEtu_JdK)$qIE^snCPLZ$v~Xp7RTC8>vCj1;z8Qa zOGE-|s9d4HU&`6g6CLVax{gmJ8#*suuiD10-gW2WgR))PNvQIocyn7OxcMwi({|U( zaFKc2Qnblm<$Y{@59kvH?4J7USoI#?Y@Mz3AGJvS8qM%zvF3461Mcy+XEN9YSrRcD@_p3WK$o#MpwU7NxW@K(qz;0{w~+ zYRK<}|7piNeqir@Zrsa>havqmL!ZdF*_FDS0`4&GQ^bVKFA1}2Hlo5dRQpp~m~grg zEoa{_&gEp91WLC0O?2hH%d__Ptu}UiJZ`!KG!5|%zn2x@U!TTa->~nE zw}K3+{pGNI^ic1X`@fV97=Nb#aUuHcDLgG?J%4}#I_kOB6XzD-3(0@}{eH5{^`DCN zul$d=Xs<1DK`gN&Y(rkpn=iQZI(WNT1|)B2`^GPF-%T=KtNSk=3vh~lB)Sa$*n9YM zSRxnQgR|wtYZrn|zv78`|4n5-(&`zLxQ0;nPM{~Ugh1vdXXfUo%C}9e8on2ihA;=7 zFcAyp55gAt1yR?imd1%~dLH*w-Hv9gB?YFP6vp zPKCaiSt7M4pG1`j(D&Arek~3}oIIP$=Y1g<%P?MxvWjiH@#srVZXIStjDmNfWKL{M zR=@0)FI>~9QiNx`iiU!DnwEdYK0G{p6AaS>jEiNLC0nl($1H0QA5fqDP26k;aEt${ z-?yC>mwp&tzK2!qnQy%c8it_n+qthjRIF{hIkqn>E^Yvw(3|k-$O7)!tLrUoWgdb^ zzM<}rgZu*ZsWI0?LvKH}ZF$y6Q--(!OCNNnC z_viI(ekuOa6m*%c8ia&N$Py(E6;ko4j55TyDNGu@>}B7vg2@U{M@7{eE`UV7nr zaXi%@=s8mouc=ih^K@NM4Pj7Pi#>9o9rneGaizx!Y=cf@-hX##O5Ki3ib5+^*}uwI zA-PAOf9qd?-{Op=gaMts#SQDhCS|@=p4D^o)$=G4tI`XWp0et@8hEvC^p;?>#1C_O-;yAsJxSUMGc z#Iql&Z>)cuS7DjmEX*|er8<$Q4cC^XOYy10DYyNY6-_4Xu4IR!7b92KpIW#>u&0U% zt%Gbz910#LoUA$6K%@~6qBiZplZswc8+W9q8of#`0@V6Hvu%u|LH5`bG&|^U6Ka%u zsOq}V%%XeUSJSGR>zAYNWIu;2<-%8n%M1YLNPusxgH87F^+fRTzy{jc42gVId_i+- zwys+~ALuF?39mMgyXLD3JfnqkOVC}^SZAP0f30bK3eCXE`PtbdOWW9G?L>>rB#O`; zvcZ`hMkRdXI7FS-J~C3vtp~gQmiFY~n1wzA{U+SGPML2`BEpZO!w?3L0O!I5GWECofIrN&1tn}5}0NqU6DTeIJK`{hbi7HGN7KO&a zMpCs*2A?RystC)UnlWP3vGSehXVzkI1a;Y(5;q0>08AY>W$_;GqS&I2S^*dReljft;S5<^TXidpz7REBfhh7kl zDOAad97Tv^v~-5x7{4OB`HTD)>V8*32c}~EdU0&@{o{swbaF%A>eN6-{G;(Jdzx)@ z7{Zc;AxYGi7Z^yOlvg<^lCbN%vCq&6AyQI4~jQO9!#96eX6iu8l$G|!Xq#naNBc|)I%Jilw{6?u$ zT&0s6rD>wX(e%65*p7T2)MSy0L1n=knl&PKt>&5wfCH4hZjvnJ)ZOowF;*%WeF4@Y z-;wxz{RRgFN5dbAy~_LUF}8vo8YkO`Ag$+u@8CUEczXR*T>kPTsn_r4Iw zK>_C!y1`Wioi7d*BpFB?j&OCD;`s@t(2o&x7O)lCSQV0mJn`GTr!&VpfL+&R`2)I6 zRymJc8Q9MfVPF_}u{rh2M3Ax#7tJ#YPt^QoD?M45VSib-#URF9s6Jk>1y=y~af~gF zYd&P$#b10gjtU!rz)X<3+)pJjz{}{52ZfND=|cBEsSmK2x*w&jXv}1^O~is#$-5k_ zDK7DQY#m!{%<;!A@n7E*FarJd5vH29`PC`Zav&prZs;TR*P`&Ly#y|k`V}m=sc*zJ zb+g|6*cS8cAiq?B$blZoNhGOwo=AlNu~*h=1_w7_P)Fm76()z-!pdlZ9yL5uBn%0$ zGb=GkB&AqjZusl3g=a+^@J}lm%B-&6IGW|>n%n=l0GGl&CJ6-4-P2I~b8z4x3Xc&V z(Yv+7r3qMcyUx%MeaU~1Akyzdh*PynnE1LZaiTs=;T zmEAxYM#ejqr&Yl&xqfrQKF0rO6F`z$xF8JLb!6fW90hRm#^ZQ@?#}gyD zeTYR)Npf9cgBqC!2EnCkJ4}IsBN_fJf(M;{VyHR7750_pv?TqXV0gHGjvOHLmIvVH zL%u!4~ga;VS7BV#DYb z_|-I3l9k+DCJU_)pH6IKTMDVeL^nza4~6UKr}(fJ7CThw%Q7Hqm|jcs+X6j~wpA#k z)n8DCYN@f2ZBIGg^_ayKHvns6GS`8PG&`ls*wdN9Rq!~`|nuEH#WRhSxtoKSu0yfu4gDUl-!9Ltf#IHAsm zsFOa6_Rv^kw;LZ_EV|(scTD?m`2T8b?0Zv?1LGIe?!+sH1}#%h&*pPaKuYF6Ux7Yx zV0?Y$3B~t+4}UW8 zwX5If%LwfuOv&T}V8Jnq)?6bXGd6pw8QgKN^KfX_$jwDH~;RB8RGjhw|t z??$kFi^UCu_X>U-e;=D!r!VA@pSCDsHCSrYW2|*c!Q#2_)qtdSarQAu-&vhS!H(=n zd_-LbWBf{nMZ+9W+)y9bjaAAG3aQ1J-xGTA5Hd0fh4Jj#6jVta@NSwX7wy4L7)oUF zsIU1@$o{<{ylovOWA^J-yAvH@Q4w7zms*3gDyp$+6WnmC@qt3io}Z)@RyPeaC0XjR&0 zNmJj0rfb8{M|(>0Op4vo1+|jWA(mo!gA9LjObZ#^^}yr$MgE;cCb76yURE^0J05&w zDvznj#XLxYcUHz%viDJEi444UONH41{l;2_Y~uJuVCgQGAQP{p8;bux;W3ZPvS-Tw z6eS{xH*)|l(LxIEX^{L0{sq`{Et5pTGZLjJrP9OpUL<3qxJI=0A@mCZ59C3gG0M1< zjoDzyCEyNeA4^NQoQp}=LMNm5P>eA;1`d`$WNSX*{WKUeqLwoodw1bj2B!IAQx2NW zaW)e*N^fEWm@OPt1O+F4C>V9`=9u+edp%~m+?~4dtp>wFQfI zGyW+|l8+BJ#Bc?D-^w6jKrk_<3Zr!|#m@rRN+?zU%%=ZUwl zh?9|h>e%(wz(3j<&5r2FrIpMf{?-?f%7=L_K{V$8cXpo+(*{`i{I@{w=EfyYbPK>p z_V+D+Y)Smc@AZ8E<`Y|Q%MgL3N!$HDj{1RpnN*8?kK-ZyJ$$#z-2m}Zd-3mAd3ndf zilz%hpEJZa0w~`u>wgcX(eDH6pW+5HN5`N2#GVlV+6oxccdM+6JB^zxFq%;Pic;WD zWRov$l#qdzNn>OgYE%{Dh|TLmtHIGlnTAGSw~~a{XST2l!$q?I5zVx{N;iZVFPQ+| zhogh(8y%z7-w?)0_(*d=GzVncdZiK-HQ$BI=HHuMK}yOr?(ifxDzT5L_v57k<hG&fu>zX7 zt;>wRmPPnI|Bn*`A}=6f(6#DR2@xicSXJ1&W{I_Kp)#))9PTTGfO}+iV{#)PeA$bM zWV31rc80M7+}MsiN;iZ&%rIo+*+($S%AEEHONh;0zKRREQ}0W^20^3_qQk?FOh{RU zCpDTt>tq@0i+w^$k4^mu(5k}SMhvbvbU(E#%80t+yRd@H^ynO9oHj!t0{B^;XyJmH zV>JKXe60ckVB2mQs?JmAS`d7Jd6SO`(BOo8{P9byELv{z+hHmT^`=iE`l61N;ox3f8%kEQh_d8 zVr;iOCKDcxqO{1~|Iu_6FmZHU8+Rz~P~6?!p}0E~Def#3FYfMc#ogVDI~2F#)oX=iYP9V{~y>c1Ri_Wa-mi^wD%V?bnl_DNB3yWrBo30SdSU`>nqN zOPXM`K5`!xHjJB4aFKDzkG6D9)mwG76(K}sMBUgE=8o4Cz&28h9U*CkC?gn0YstKG z4&$-+s+sERAy{CTI!h0kk^8w>?+A{N*2P+l@*K&Tpa<#FM!E(4_98XKZw1az?ub{% z>$JAfuaDb_pY`9kC6DEQ|A89d#(2)1&9*MZy|ue)$Ck1UfX9XG)f?sw%n8&~;7bht zjM{+ao?uFlnklU?+`mZRF0goi`yPW*e2Mor>~n`wc@<3Qq|u#X^mKGhjV276Oao^A zVd=wD!7)2i?GiA+d-8ICzL`(&O*uTYApLgP4+jZ+gZBb}ff4YG6%Yjtof^MCjhuqL z*vz&i3VzJbtIu8mc7A&L`d@$>`T-a*{_@!N1;?&--_|&tbA)RSj?XF?Xdc9sQ#;>Z=O=zgqcYIRBVjfJQ_3iJ7)yvjr|5B2x;*5QN_r* z)2WTX>bK{?B!VE8u%Dm0ip?AK+&wBoW+~1jWT^-aq2RO% z*3v`{b7F0qyX4>7xSPqUNQ!wb14%H7iU%|bodRs|U#c1=tM)VTG=18|5x=;?G!Tgk z$zw84egSOQFc?aY9V8I;hMVxe;GGGKy}HkeI#^xd;zmrN;j(B$ZkC8T64DWTER9wG z;^E*Na8}(IKc7B7k^eQ(Nx`+yqh5r}OFjM`JZij33so@}A_1cJrpMdolmuiohE*`= zdGFi-U~97vLYEL=>!}2(r>*z5d#q7*@Neq;Ur#HvtH2lj1xU8tQ_TlR503}5qKE+w z_vaD+ZFa{K2m(Csb11i3ZXtZ^X9 zutsGYymEz-9v&&V8kEIN#b5>U&M9j%+%nG|~Qkl7`f8PqHA_~#9 z7?Q-{AwrI2#BJ5J#y;#G@DjA#JVE-CHRrY)1aclC|D^UwSA)P#Rwtt?Qo5motyY~A z%9dfjxz=^mZ49>CXf=-P$g7tczCpjnf_-nmBQ}ajjsdm< z0EvSejsHliq#<&h7!Pp?MymPZe6WA~mpl9U!dqunH~;NlnUBj0@HmS%FLM#1N`-*O zbl^zkj{^C=P@bru8k`?ga(u2wC@Ml%FrgBGK=|t#v5=?pMs~JAcatQ<2J%b5j79P+ zZ`GHn@7XaDFVf3God3f3udQr8wmSS4!QVhkb&#}%yk??|GE>ak_a+5AwYp)`J}Yh7 zy0u6A)9rT(rTDH`j=9`5mP;#AwN8Qdu*pb}^3r+8=$C#ufoOa4J*lruOtyQi5B~@Ka zV+^@EQH@iTi?nsMn9deIO)KuC=sfn0AWV{GCn(FW_eicvhIx|iZ2Md$$mh<$)9x6- zAS(aZNK3O=wSOv=-r^MB%v37kFOu>rQY3o>g54GqHmdtqf+dO&CD?{js}iw|11|^f zHXI8KM@hamNukmU6B+sP6RFlu!hEtv|7e%a9C(WgFo6$~gfzO3x5F-E?!jyl^G6PO zosn%MfodF;1gaPC6a_mv{+Ia;YmTeQt6+i!s<)QP)ZvqJ9W7i=Xbd8B34U=tV;tn-K_kC5Y`M!ng=hiiQzbDqMWhGY@VPpWRq_pYnUvV_Vztn zB5%*{rmn5G7W?SZp_~ADQgcqR@5V95LE1H0=A)1eFG>^(?-X{>|2 zSWW^zY|)k*;N4FW5yaLjW6My1B(h2myyEaPe{-RGkgf%W+3RuQ&nHGdVWu{n92wGo zH0}9jPxt6LOn$EZuI{4kmMcrmlr`8*_2%S=a1HOH1Bo(7L?&S(|gaTKc{`StUO2|+IlbC~@L zO0B_wpbO`u(G0b4dsdsrR1Is#gYJZd{|0S<@H=)s3|)1AAU{(l7D z`7BuAkcHG{WdG7XHKVHj{h}c~ZeHedpI2rhU!z^5t)O-;K2=Z#vH%4(iO19+d*%>=!3OVeV&gJr5 z2NOkCyQIl{5hJLKs`lkuEb8UsaWn#7AZ3`Hm_{26^lp z**UP5SVtdCL!yq&foRKMG55f9_MM`gjK}fZg&$9uvRS28W9Y@naIeoSG4s`68@GM? ziAZz)#MpoiONRkFM?k(cjAr<5{9C>!WX6!^FBTJXvEE#D>k=`$CtDXN8I&(sQ3V*1 zlHz$(5wbVr1|Thd`$H0UO{t}qHbDz<100mss83lmXQ-c&^+hTU8*Jl^RMSkyq#G=m z-DE4GCvWzTK@S4jPiTWiS`hnuBW{~Se&;||^=X$3iLiI`#VJ7i&a>_Y+KP#Z%NgNk zkA9Lb#1t#a)wv-Mt->$LxdPH@79=0~+!ZqEqDo1P+@n+|Yw}lltQ@GU<7K=O8U13) zVb*PP5ZG^zQ2lM0KRpeOJ&e*SF`w8Z9!>=Zi`1T?#-1-%?C+W;1j0;35vLqC)wZed zc?^*+LM}NVs(B>F4PfP)BegQXYU~!TPt4+|a-dZSPOtoh78XYqHNeK%*O z@+~q&bQiO4NE~g;ghZm2cJayLFP1SJ?9q+`T~*jhxx_~#%WllweoFinUxRjWHh08~ zdKHi8UoD7*jZwwD5Yxm$7=O^k7Z?=TM-nX7#DX$5(DsUR>0%=!1d%mKg01tz9Mo12 zjXrXqBv4Wvm``xY<#S{#3ok%83RcICMi=>eY%=1&{^Er>oo0w8}xyHr_h&_zTx)YGB=wAR95 z;DVK3)KWA=5HI7~Ku1^kw%n}5Rb1)*Gi9FSPF$OgiE}X056SqHGhA=rFU%=}>5OM} zoT`$LnW3Vi>lS_*I(`9^cW}=85cQh@nsW@1pz&}0{?`$I@otJnwvDOV)B7a`=|*Q{ z5II~GyPT;Hyo{3e@3{||20<*h%JfTWuTQ3&;#?yX8mNj42bqP59a1F$hx(~QZsSq> z3|bE(R?}5UkqM0C8kaP)#fz95|XyKV( zwpQ54=USrK_? z!l@rLI3Ty0VYd~`F>%` zxzLjG&7ND73Uu#yIf|<4L1-`&PR*{C*V(HLdwK+6@Aa4k;Zjxbah6*Pu%sfVSQ=08 zmzAW5>&PiQ^V%GcbJfsQ&85py5XMHy=wU0ImCGmz@zAzP#l+mixUa}yMvkFGV#yCK zD!mysV@4-J@?r<;E5A&1C}{E4;V1wP=G++fad?R5Xdtd<3WIU63~V+>780(-oMt_L zkP>KU;-@fj@_x>1opq1aEtYkqUvwMBvuW7?UeBQ2Um0qZ=$n4FsDYr4eI~JPl zGL>64Vup$=j37g}ll)<3k`Hl?ZuWRe&;1jr)Tqw&WRZR*Uvw7G!rX8Bivm%1l<}Kr ztSY$>xQ>$bttI|Y5q&>g#>*VbVr>7+zY;s>NnhJPnHLhlLBCBi3DOtL$R!DYC}rqF zPdlL_x7)q41G9FKP*c>a)y=OHW0HItjS2H=vsj!7&=EY`XH$rMHVHfip>eA1WxQgZ`2t7$lE<+C~Je!2W z^9-L|b${C_Us07o?6bhV<{R5QGgkEuFGhn)WRp3TqS{g5Y#V#A$ci}%Iu>v0dUx?q zIXaa?zi)){q-+c9!nj)OY1;h0Y7f`b(9l1gOxyA8kt6)W$Gp`+r<}`cKGg{t4){xR zw;i6nnMQq<#@%8z<-B6O^V$gkqFJu{lWjDE{}-?DAJ^RA>v_I_oW z!7^Tdf-jLn`z67oz^9C|*-`0^(KwFX#?w04PeO1`Y^i&UW)b|ErsXrCyRvl&*T-m= zZ*rKNY~!RFpGU8iy%KTFMopw2r`@b6Dj0t2DXkbeTze zupR*t76Svys6x`9Rhp1&xx$O*E}8i|7mTskU$6%X${LPeps2WRP_?M0-GU9W~Q2vlUO=ucYS)t2snu4 zRsFWn%(#&${3WAKx3pqmY^*GH8n(Mfg$)Z(@yBqXCdnES8bpa-cw8jQu@eCn^v6#z zVvy59JY}6dCo-#>gkc|!Cx2?*TdsQZ?f>=Yl zzHe5rJkd~#D`KunUn2ijz|2*Rp8{D}%S>#KG?X@7Y0(y8%qkV~6YqNYHiNzj2l|iJ zYZfA_j`3uD#UxXO=4yBF?1Ytos9lX9RqaQq5Wcv`!t!vc+PXTduwC3Bz6dI1dGOk7 z2dq5@feRXDef-<^d1IR<&J0}dx#r#ER2*ctU}bv*LzEJW6`yK0K%E@IrgazF{AmpC zdQ4mAKU0)eo}ZxhuyV$LqlGH? zwcjT0I6_hqe$m}}jl%ai6C*X149$tRuIsf@%Zgjds3P%64k*SO!Lp zY?ZE3VGZh0a>1;p2f#%!@0EFcx28gfC(YhZP+rvwE+v9LkDM(en#|pn)}xAUOL#!V zFRjeXEwPR;fNaf@k4Ltc<@(tCm%wGC$P^isQ9MPg3|aZY<6NC$K_vYQf%E%-Av({m z6hq`F%Yi^oMYrl-6%FVu;}zQ#(8$gS)#Q%}xK~0@S$0U;Fks5vuZv z|D}lpcxAaOJXk}1QXus4jY$~iJ_K{2zgR!)Ak0tz>fH{oM2l^}Lcv+@nUV2+j@!B+e1(hTrk#;)4t zQ47+1-0QJH*Z#Vi44ut@-1X_TOa4vW@PtEwC*Yyws=9mLzKDUy?{JFAr0Q~YY=2 zYwBw6Cz{5FVs(YoO4okdgWTO~U^91+8_P@#kkKihym)w4vQmpmHC=AEk69aB!9z}l zd-;*!kV}&;%O5m3TuZ#5h;po7X{TkJ~O7a!*DDFZj%fv^}3LGAoQzi1*x@WK7> zUg!BwVktYZ{NvNmSM_k?sGnYLtm zf)aG`KMVFNoGst-_(|nAlDS`)oS2wU5)^6HY=KJU=fI^U=0Sy1f6UoXua(HS(d=wD z*xye{raoO5>78I|5Eg7JV48MT9#=z5HXxCFh@-5WaM89|)~Oq#4^ga!-2_G3RC>Zu z>EaofBFSe+VijZx-~kz;P<9rG5HOFl-KiE&*rr~KLup=vD!$GQ?T zS#hgON<6Obii`{5vX|ClzhsB%Vza4C+{Nw_PD34o_&667l%+#v{oiiD{BL|SZ-g&^ zlhBKO%pde2aj%G#EV2<$H?aEw$4L$aQ2hbLG1!3$(82gUg!^Xz@sa<^G0rFwHh(x` zim|KCN84vaYY?@08Md-4HY{GHs;~qjrm6GnO?gBZCk+kBC6BGeNtzZD1@6R`L$$g> z_#!Q)uDANv#mC2f`iwZqAmf&(Sd!(_Q%gb2^(QWw^!ei`j|7JhNRJ2dbe&E3BCRT4 zPp2q%fwN;abn0jT#^D{?Sr3yXs{RWMvNY_pee{Ey2(*&J#0Lg>F1D%sDFQ=NYvXg(63yrGOQ`a+Ta6fn!Kq{w6a2Tof7dJh1*Lsnt$hj{ zX(E1Bk-1kz=+qh#$fNTn57}T1v7y{Mn2dlgAQWLN9GJNbFk^}NoCA)9FT4PItJ_WY zwt?pQE*Q31SBB-p3d|p-^Edn`bEodpvDEz=mRIE+9`cf=}nY3k5Z zH`t(zSusUOrg(qb{%4oXO=$at&k2T-Eqa*gE`TeDUh>D5Y7VRIddF2hnwj6P8@k^; z1yGgkQEqRHi%udDDD@O4&%JjaW>Bam2YEIv$)x{244qGN87eK!u&w`mX_e91!C9+s zjj;adYUr%4N?@^%h1St}!cZWxv&%X>Ko`$vTwF_|acRiqk##L~(PLy?_i->vEQ~(l zCsl+^mSaN)fF+)gN5t);{1eX!Aqz7d118{}f-g?Q;C*kbF^1Lmj{<;CB;fU4hNeso zvOa85*wp)+6fS3RDOLQY{J~qwdkHZs641UN+168mIzybEo0Ag4XM^i$T|*BePFuz? zJ7T9ugAfQ)EQu?f%9uj$Vxa-mA%mq#Hu~jrnI^k^e!^CCLN1PK^V|iSfG)nq9u+?t zL7;9(m$q{u!V|g}QEWOcUnUU^wVw^Fu{Y>2?nseLebW zikCjMsac%6FeK~RZ;|yvd4V_0LBDNw$tF`N7>(lA`yY=hIBp@y?XofLY88r7!v)Nu zxE6PUQc-cR+_bikrxTn?xgr}7lcAcc%GUegTzWIIc3Nbn=0{M%acyt#SX;D6ppJ|sk`uzPo3CRVggLkPFD(mjJ z4>1UxNPr0Y+2#(#?Whf1)DG*aOW=0`4-vF#Sy-&n@+Vjny2fHGb;03&Ae)PpOuVTQ zYk3^MDVEp@5>lmHZGR+)kWMA+t#aqfMfT9!@7qtD#G4Km99nnvy|!0Ii17 z*a3g|ZQ4nhv?~_cIl}%B!i{dVvZ)H5Yy1V*6Khz0B!$sQSBj(aB#PV$`_+E(hws?t z`#!Fgq0(s#kZ!cYb0W3_X%0%(g(66;62aV~UV?$R9ZVm{{e$XDu}&ru=+lX%E4kp7 zzL9Z@=rj3PR_(sLUA&9nu^Jr!dc$2H83`QLq?s?a`-3dmm6U3W8I(^njB4Zo}z#+R2X-ED2p-I|6G`{h!a`2TlmwEn^|30z{b9IHKo+rR|GN?9RiN7A2_2l zN0qtJ7JAr7uzyW=1KAP3o6yNe#wszGbmeAa#OA|AVAWS*j-0r23}DxgRKjewowT0Z z^LcTCYLK~8+4H-qb+6)<8vOLaPZ@?k?Gt9lE|OTEav;Uvvmh>_Ozo@U-{9lS3DN{a zkL((s2FGhsUH+jv(h4Oqa7&22i1Ah8+FtdJ=Y$^rt@Y?t|6^V;QD4XEI|!-ksb2%< zBwIFDG81QoW|97^iVQuz{Zyb8R6i_PkJ#cou}-E9%%L_2uaR&?2-xMtn}hjhBS$eh z8fK&YtbHJiRI-ae2wy5R_j;et_rc?raoYR?fc=OIzLFJ7woFV+&VN_g{dV#E0zJC- zNrbM)D7z1top!-8QSiC-2xykZCXL@*dhdHKKtO;EbN)>Ut8e5=_YpT(_OV~)cT)BM zWV(#ZzWtrO28VkA6LF$|HsHU!Y;M1j{3o|-FdKcy?t3KidtLF*1;%BOV9hVw4){ZX zbHds--H8|(8M}eEIJ98(Apmyq&hykoc^M0qXZ#2%IT@M&u;iG1xR`l_zDsP{y?Kx@yyDozQ47<2V_j$#BhI0^|qon6Hhrig&(J=`9Db8urNULdSVKOX@L*I9s& z)YxmbeCh_yt#JW#USOJ@vT2=17*Mlwj~LBB!l=q?w4ya+<>yYUZYnh6#v{v=mrd#d z_VBtxtxOY|<#FpGwn4;VsxXyO(8HZ^YQ>%@{B_v{>e34#%l+rC?b~pVWkFl_#dHYRh=mEDG><)IH$xrE^;5O_RGWM^6-1fGDCX zRQ>&4-_YDFm3K@m@U{H-Rx%vz3OJtHE_?ts-KxiPG+v#6>3^i4iMki0wqGvDtqw&k zM=EazDxP!EMvFJSAN(H%y8)1^$f)iH+!TS+sWfaLyEK5-oL&sI!g4dFCl4Ayb@@}N zB!q#vQVcm7^NayTtCa>ylyC>eGzDt7{m`#2HTtaRcx{nWhJtndVj&@kLl-@_HWG~k zgqyvlf|06XV!lzOG!W1sMg0YZtp_f0iVXcDw}9lL71N>+Br+?))R!JPRcxZ!mig0*BpOwoTl&h$+H% zXh|JpaRFv=9GcBaw&vfn_6R~*S?H03Z7z<<5k|)TXryY&<6{k^K(pQQ5Q4!^`Q)Q z#*L2|OmEa0SyT8a+!DQ!JT?>({_9J|c8&MOzN`rozU=nv8Fzk-1lsm{-`U+kfJ*Q&jQrx;vEbtntw=wk(G?N|FpfLE1gfrNFcVQ2 z#@W&`3uEOx(vbt(%nXBMXnHyZn9PsyJr`EY(1CW^WAwP;Q!Djh3#Rx^8A9L zhMz=3a9bVGVG$?Mz}8GrsT>=Cq0feDq9Xz;#I2y!Ww^{v#BeTD!iWuP$zQjSCvz$6 z8m3KWqdy*CFuJBX_Y--(e}JPY=Wp^UZNRuI(D2=QlK@(Yba~I-?IM|nFYL*9y0eBe z5xYV1h{dPvjH>#kdbq7-``#>mJAk7x0ClHKTF4J$%5%4=$(t%FCs6_5GH}AT=*N2Z z%#_qeZ6L@cWpGW7#o9dG5DXTADk6_K_)iarwHN$xKc{{xwwI#b86m}^r8~b1q|{|P z;PG!IjVDHIr&%?9;Kd@ENCUBV4110LK2J5Z)}G4y*g_OsSaiMC8f)Q1aVGJJ8ykbC*|as$ zWqbzRj82=Auy!yi{VpVxwLE2T;;@jkY4p5^96Nn>a_@#11a zCwSQsW5{XJfZBJSF``!2n;q2Cc!j3=!Eu-vVf^gtMx+`QbQMt;;P0SIMOjMgDG%Bp zoqiAQse+J5hCoAx>;o6XZgoB+vp!olo;!i?4?s~Y3)DYAwJQt(XwCbEz&vWe>P7^@ z?}pbud4Dwi226(U-ovTe+5h;?_x?}ieF(gfL9141^7*~+Ir*L%AO-~#eTu*S&sycV zNpv5)Z9(#RT~oyiT_dpitW5&m+Xq09;mg*=E2%j8zXwDkR`5(Wrc45jjO!JG)qI=u z&+F#UY=eWPe6&3_(vTdT3TQ0 zGr5YFIJY3|TZ8_bA2qD4%;GOJ_l&#RZPNi7U7(GR89FXBD?7zb_V znQe|{PP&a69$yidNoO(BqC2=3WV-X+zmpWyW(E4$+Z+8`IZ8B!SF%QTSHd({V%S8f z9D#sa0AqfF(uKhg#6ea@AzcX_Qv^*9+M2K~kFlrlJzhMdcOf}5bxe4_=kf5APGAZY-^$M=U(uTjCRez3Iw;3&)I{f`cuti8b=bmyDT>UR|l zR$iOCxut?jfg^@+zUkVi3mDJCyoB|2<_a~PH*1|w#CT#@lzHS5Hr%vO)_E3ew3?c}{4Q4+KVo9m-{ z(L2Eo^vx<2982*4gUBg?efl0A;>(==(0enwAuk0PjkK*O{)Li!B5ZUu-e+6opd#TE zML$+GT`9^6f5N|BGaX*D!sMjiW#QEino}DcsSoh0nL7fb$XKzkV*X11iHyzm(d5iV zPMqYGXv=(K3rz4xZ)rf-k$2-y?^J_(%#*zb;4h1jzNad{9!&FWI8TuwRgIvxl+&! zx`GX|B>%LhX}wCC$M7A7swUZze)xNkiTB&29D4>c@6~w_dMJ%5HARUojZ{T~;-o4O zK76<#tfXY`>rc{#TFj_D%<~W?hnogS(h7QewO1ajKJ|u>Rwlw*OQ>$js74q=jxY_g zEcv=hY*e$}Dp9Erud8~OXjF`u_&I)QyXh$8Pl&oarL-$*=rK0hv2Dm2jY6O1oArt* zng=E-H8PT{r6HrsrO`LGK)_sJ!)=ovUdqJC4yvf)$eA8Xn^se*59|0vOZZSUr8aq5 z@)g8JnU}v#%|L3>0uh-i9_@K3cu*V1fYFA;3D-aGYIw(7sPutP8qwNbE*OHDC4P-O z(ve8Q>CKPWa^4FtS4^$3#&ym3btqrTzv(t_km-1vn1|xi0ReMY3$?23OIQkVZpH{^ z%Z?cF2A#_c3(N!j9BbSt8R^( zjKh+S9Qvw{I*KKD^dCJp+g`*c-2*8Y36t0jIx1?#ad!4G#HB<(fV;I_Qz?t%M~rMvYi}?qX6|eWkFe2*8t}ZuQlc#V{5B)@W$@Gbedm z|7iM)D|;i|4rWr538Nvklz`ReV>XjO%tSPHd#quhX|-55w-q>;67E!(H-6@z#tgj6 zl*vlnoUSbX)_tL&cQeW((%C(pku0Lx)x}aIMe#&GF}?gMN?gmC#D3j_ z7RkDKTw)&$jorHwu7x;XqDWw~uVq}>(6*>Eb;_^tLHE&v#7@l1mXu>HoHO@YC-EW&J~y=pQ$yqMmq1j$Bd6>$XU`VzRyi%2TVSlh~UIT&lqaEL~}mgbsFjYJqkVu zsiBc1Y(@Ve!%XojV6}E%K)LLRi()9j z>=fy~{XNf3>)7~VISpjc-xhgP91vFBBpYdwWa&p_kj_X%R1{^Xij~xd+i9hzg~75s zBvmg6U(Y0+7JF2jRF#Zp!{7%}oNlCprN%v@P+%B}V51c&(9KCjkw%l}ysBIZMIR&w zwev{~TV>PD5T%EyEyAQ&vnXaz@&_w%H$IB{xrwGJ%@nrkoe0bzI`h~$6{%UH*-Ed$ zH5bS$H`6Ed$b``qL@05CIyme37E}%Ivf*FQh-wdXv?Lc8l*ey~lUo_GFbHo)mcFEj zNP%2;t`uz_M5_?%Dhvf(^(*fF*nVe9+M}?Oj{YfcgUf!~N63VOE|5#Ak+xhzOijUQ$Ah zCgMx#!E=gDW?o!OgViNA3L3Skr?0aj?y!L%Tm%j=!h~k6P~0vCRTF8( zB$9Buwn91)yNv@r)DS4z<(*k)b+^MuHw@7lZou(!gQpN%hiWf?zR_|Ah5u|Ij;Hic z1h4XJ_9I5YPlys?7Qq2XdzLYC?ja<;1fLhO&PqCWGeIql00o)$6s}KLOh+k$-Kc4} z6ifdIgvf9i?tWVL>S+nvs)}=uXP0PgUp7Lt=;Htp*4%Z_>=J2~xz3zpxJ#;x)9)>5 zu5BE@K3{tXH`9FaM)tX$6Iw9*9z-e3~GG^;np#Poym3jqX-B$v0&%Kx|kQbUC|l0t*t ze(`hH3kPVuC8Qf%*O%y*lF|sO=3&SJri6>K4J}Lgna+mBhK)mqImxQ_ zw0+SDrSxjVMc5>J;p%jO{8;s0c0ev@`yAzr16-<<#|eCfCuLAbOc9@^fAI4j;ueib z$7>zT#d6@9{=Iy)zibe4a*@~9AZiejF3`m?tzon$Hc9@jYpZK0+sblw%-Jt7)^-)R zm^N~~)JFMwb|_${kq}MAPa&3(Db^hwjA*8$mz;oO=7Vz>Vm~=oKt5EwRWzDVxW=3c=jW z3_1X;@d8`&fW5ke!?JrBFyvw*LN+%2N2iw;+G0Q%k~uq|0T#OmOVIGw=BgMJYW^m@ z#9OxAfhlaQK9OGL=~@y(P6~EVJxDQ;c1-qfH9@EyMxQ%u_Qf-wZ^ZVW+Syg&EqfKG zELqJevrjq$he4j2s5z8nOt^<*&K*pvSc|A_T76h^)>7WeHnz$7NeELB(;hrrYXOuv z$i%Kpc~dmcD6n)(*CO^;gF))6iGzr0veNBeO#;*5$xZPwr7-{!h#}Czv!RSPqHMIn zCjo}4;qK!$l=8Dp4W7tuw5FJK@(Or?u?^cRs5MYAoReO(r~9)j$dD4L^VXr>iB;~? zZfA+D-(Yai52*is4OHkj+ADRXVdD|>rL&Ej<8ELFWm|Cjs9b>X&13{t9Jzq1%@~3M z^1qF@_5+!-QWXFy0n(9p(Ko)Ts%ay5uy_uj;u*A7>3)j1))1QXrEhvteeWpf_#Agt zS6L+ilD}6Vf;YgirW?kgrTNNz_y=XP{|I>2Cot=`Mr#X+rYE*At=MaF`diN;RI9kF z@`Y#Z7{opgIeB4S*_p-0aPTbUnXG)p%R_ZRi<-owN8ZCYio&N;Xo8jrbM9}VVGy7< zO|&n1au^vF8dXb(>&TYoVsL+OZ+<0Opm%o`tY%WK)m?0?LV6h0cJRBt6l!C#&sE4O zl~P+I2Xg2d!I0J_K44bjJ8p^&vNo>rD$SZj!B!5isz-h@eF($S=Odu|LF?P_^^KTo%j63=l2YCom_{wAIb zaXu{6$rpzh1oY>`|Gz9Z-%bF)1HeOs-X8X=YidCMeyjaAdjUGd0)n)^!Ct5M$akMT zZ9QR^r+*FY=TM!OV?1d0{yZ%Np;k#A0f*Lgq-YSg^v3T3cQRSY+AmR5$W{iYg#DXP^zU zo^snW0j5cdPG0EhAqFkO$&!k>yh{0ld(MTGk)P~$-9$K3n9DDAt{ZTs%Fy1s*GCfs z!r#A=AN#kvopK-Wp~da1vN-wLCu$p6exjdEj@#>O6^ZG_7fMV62(;p&X|$fdGP92!ca&XU+VK3s?~ zn|w6*?4KSpJ%P3)3Tyaxw|f5=LL?Po#|RDb!VJuSIt|#YgxR8pGz2zLz(K>x%q$pa zx?KQn?M~-;tJ$-+W%FY}hos}NU}a{xh%$mML_9=^5Kl^dvHI_WircN)Ca8x(He3Db za(_8mpB4gfyw(Hz$-OC(dR4P}~hIYEL>Oo2J20K6GsO z=0cI(+otc_I?MK?(eR?UFH2FoWw_Y-SqY26DQrFi%BoxvJ(EB`VW9p?;n!nT zD-lgTR9b7l*DOVu8Se0O|71O*BL14)Z&`9G!sUqeT>`Wc5We3N_DYf9Z{TU0)8d2D zrByvgla_s^A0{rP3wZAiMPg}IX~mHUj>Nx=#oHr7E85LC;INSs3{}<6pkC6GO_(Vi zGMy6tR(TRoMQSk7Hq?egZ*C}f(kZt*&>SjTKjAe*&kt)?qF7OXy=FT7cj_kmVb%@U zzLG7NV#*&JWI{|a(iuMw;|Z4e163_y|jx!JLZCkfy5^QHF`?tqNnN~F->789y{tw#~rOPzRc1kCC zgYw+3LDvqWfpK!J><4(JT?BY zgLMKo->F{vw!wrZd21v-J(_L_LxvUmUAIh3Sbr#EE+r=lq?;`vo|1gQbbMy%pr+~* z&viR!qz!S8xC7=%tw46IH9*TK9N=K2rKj_Q_F!(ka{+Yx>esP|hsoP$Gs5@KVRrtB z`=t-~QhVQ`4kj$Lj+v^6N9sW~@%YLKFnyaDuO?=@k-BuU6%Xt@8&w<9`4qI7s;SK^?e$ zzQK-96a(|~n6%0{-{-8sNHyT+c8of8u@TF^J;9u_$Hni(2vdDYpF|&Sk=s}t0+KZr z9bzyuny4;UBb-+W6BClyPuOWv4pOfyBgs4NoE|c-$sVdJc@Qp8r%-50wlIr?DKpDr zUqdxO$Bmt}Ev2JV8nOl$iCQQ(x(5fj0Xu~tejNIFdM(d3(Aoi$^|XBYUOO4Cf?M{5 zrC@=d&Tc$g1OzW*^aYdN5Ej?;t1;`r%lqFRuNZSZ-;2k`+^*p?AMNGk77sUGB_hJk zO{M#N$l1X!Kg@Q$;zt2f1&cS6deqjkd?rYj@G$BQu5~y3b+!Umk423)=c>5 zcgYZ}V!U=9Iu;gM+K0WPer>Kt@kEIP1OynsOU}*xSZAVgD#8cY)3#hM9`m-ZUwho8 z5?j!=_9eK2d0s^sTRFw;1z}njtG@wed1Or3NfSsT=tR?LSOsm2PgM6m$Wmg1gR(T$ zV?!>8{K)Z9dhc}q%@+75(i5&YDt)*ysYl1XuV(TlYGZ|a?GuP=guUd0tM};klsU&3 zFr^6;vx_7!LjevfMHLkdmh_IVe=q^B_U-fs?p9$sC~GsBeBDf&VJx1X@!I;w$o7|$e3plwZ?Q4 zp*xZYrat3_)PSPj-u+YaMmv6s{$@#kls-%R_tq}PlF_?2UKqAPX3U3vqIJjPCsOq& zqGP#yvbjo6Tzvih>n~h%dr?yAHCiqfqFsa%&v)HE%T)vgXm7k&!tABeS?|y4J-V1; zJHIi1?BoA^N#o~+vTo~@i2U+% z{d)o<2S>em>?Z4ZB}_~vQ}(=4rNps}7S+K&wrCnl>bWv8^ycNrVOA7p4VJAj1pWpB z9u^Raf{Z?u2TEAI<|%Fhu0SY>a%csgv^3+9(rS3rwhO{X-7H0ZZ@&pF>e z20vhoXRP(CJLa61meNw|!?R`sCum7Cy>(LVdQkkl{Kg3NB7MVY5hP6*EwPQo<_q>C zqY^&2#jY<`d$wd(MH_iSq_}pfZxH794bIWPmy}KBXzEv2h7nAJK6^I|cg40{%Mxk3 zOJM*)%FDY;5CNTdo;-f2*5ptbZ?4R2Xz|f0J!THp>@=dC&a~wZp946j$1DmdQ0*8E ziv>2-HTH{w#^;@Qp_i2cbs(8wyik;cnffe&w-GK16>E!Ca0OOqxGL=5Yt-RnYHY9= zvJ?5alk~J_Q*MmxethkjU7@~PiUgL2LP|x#wCDJi^xv<@DWCc7bmaeXBEZy@F+}(H z!0j+dp!oMuIqVkyb(1`|vM4++#xs4#*(hCC#u|Sj@~0H9d6$n!!HDnGy#89I7F5$tc0j>0F+GhFMfqlW1cXt7we8Pl1FVNMgA4BpUi1;hpokBw z6f~?>EY}p88^a>0iW;g2TN%HOzAdu)lak4!)J>DTU@(3U*hpp!<9V^8mRx1S%rwFo&C zEnr&l_gCOtl#hMp)I+IQ@iKwcbki8WR#sDePS$sFmL^%dUIG=wrrJK4VMPm?i=g#n zx$361Y6}xKkALSO+d4~IuEgmRNR-bEz^|rx?#yfY$?V_O8`etxeqr;Wn`{|R$3zO5 z!o?%?15-Lmo8g6%_uj}(7Stv@Nfi7HsK2id#+AhV=1}=9(|#&XM7XwIG+G=gU89(P zXY-1zYj2a#uwR+I)(O#f$Om-J$?j@?Yim1XdR3|QLebOAQop)h<8Vq8iTAyA(>^Nf0BDD-(rJMNk5*RdtFtWj97FGRu^UAyMMkxDgJwroFKfkX6 ziFJ{lqMH4bdBMMAayn=Jo#O5dW!Gy|(S6wJG3K`>wkl z5{Yp*yV7G$81!uSkaUFpZ%Z) z*SNYUf2A$b!)nM~s7ugCuJJ2pRhOXa=wL~=iU8-B>B}}U+v{+-#$k+pFo%McC|a#G z(_4*b8h|&5NHtH*uyk9aFxL=d8SWhAf`qUK)`k9J-@P z#D^%bz`C8*{J0tQEy0DBRY?cKD+G&pNOJJJX=>S`FRD>}y~@{9Oazuc_R7TAS`MO& z-~Vg%d@H>eX&p;@J*&+3dJcw<&uY<_$S(V(4wAeHV*KvF#ZHNjM-otXue-4)Ac>ne z_x?dPVbU$BT(*C0XUh7k#y$rQ2#qh3D{9n3ZWCEQHhx)NN%&pN($}7q71!VXxWic- zQp@Iwaf7>T=4Ddnbg@5M_f1X&D^?QeAvO5go_kh#L@0di=TzCJ$smVW<^h4se8 z1ZHH-KME@1-;{DZ21~XP0_lb!3B9ZoHY2U=9kCw zOqAc#CJjk0vN1o>2Q}agnj`+5kL^E9Ff-KKM@%-U!c>~I=vr&<76h_I!=MCcVL{#~mDEEE7Sc7ry*h6FY8fj>@w+{ZQB+3HDhXB|*iqB*p<}Q70 zYZG)@Zw-xc0hlgsPk)X@-kqvbg-o7fq#lm`V)|z*25~chU14Cs9RuANK-n zfdPLt##rNBb}W2KY?ujK(qsu&A3Ay8tgnN`IJEtT;UkZIzix4s(WMfMN=Fz*Br&O* zpta&vcMy)~m0N%RUSCCebvhNkbd2;V zG{wVTxU9R3ET>`VrIjj2E1#~f@kbvsc2(ktNeOqN;ZafWB{PN(4C8&E4JvNRMoy+` zAi(lNt}4UJvQDqARSP$R%4ZF7N{;979R-sOinWV~=(l zRYpnOs`Jbj=y*80yv-}G1mESx&klAU zDDm@F>!Gt5mtEKh)T;+km#}_GEz!qljo-mZE+q+m{yn0D*Mfr2|20Zy3_Gtqc`r@4 z(dM5(B9QV0ls&|OSf#nm>GP6O`NT{HUQ}b*QAGfVTl6PDm zb?LFl;RM6fm~T1+1|i?KJQ*n=8{q9&XGYW~`jhUMSTFnZN48e~QJ`=2!6mjdq2kG| zh!ro3m8Re;s7LyI9@o|&D*ty$4*TCS9I1cA&R;wct~KlG@I@|1me@#^fBKt{Df=Ye zl1x}L-wnGMg7^pV{RXoX?gy_?Q|2qUD7AQ7V&UetHY?pJX6wrH1EV0Xx!p9#6CIEr z$d^qE!DTm;1x0m?m;o<=r>AE&kA3y^`yZ@=f8Jltuz1}1?v?%5S#7$$byh;3-_APl znGZN?fy92;ckg=u3v)-%uTzPc@Fubh}+^(j(*3Msl$tUmybX19b;D=mMf{%{a`&<3J8ublq_G?# zb>wftVbRlvVrnL)@AE`1mzEPmHC{xQMc{c)R}Pa~WF+@K*L4yWpj38}dgn zUU120o^bY1NgHJnuw%BqF(Stg!C{L(VC6fCi2LUhn!=7&;@46uG}N031t6pMIKAK-fdwPd+Fq-slT9|rx`ma`uY+EME3r5oh-my zjsMQWNMER1SN?!+C5B9W&70#9NxA4t>oJ7)_eSd9m+r*9Z2=5PN&o8TQv6^;Z{}s> z-NQq=bOkG6%IehH8P}&}+V>~;Jrv-Fq22OeS54^7oi@+r5%Z;TyaN}JBJbzoK7!MLOqE`YXSIj+^D`?_ z|4RJm#c(r(k}Ey|4!@zG7Aof>%C00fjZJWMi*-)CQCC}mU&iRMjLt=x28)y4`@MS` zkyY&IXpN|Bbcb)Q#8DD)tG9R0gsC*XEa?%eWK7sXFgmPe2c_ooNaadjK7T?-5kx3QYzGYF6aKiY07@iLMCY@8Yxah_l~D?Mq6SVMGtp!^ZD*@VO%NeN7GY$J%SiZ-0S4n zqFnUMmfAwTWS*ObeCkG`Y8UNXqZfF4kl_rv{+8YGjLfeZ2r9 z9bojEPpGWNwfHqY8C`wIV9{0-77_Jyf)1ew<3RYXNUa%*rR|tqh=+|;SHNU@mw2e6 z_)by+s^uSFY)?Madd)QBcSpW#e2t>IVfRzU->g+Z2`gvw3Zy3yFOPJB{Ir;9Eb&-i z=xH6!_sH0EK^RadH-ZP^WU-t1GWBG6W4#ermvx?G?mI&c$o3q7Y%&oz{SP$08#~z3PW}R z+jjV=w6Rrw25K`p2f1<{Jx4cmrgx}CF1+UWESaG2kIp;~;ILN{0$u+L&WP}9Vt z@-X?tqg>5u>%{$(b27*?Tu?!7A-a|MhXe#QS>gd`TI7 zdv-v;B=~*`=Heau8P+S66s|N2E2}yKu9||Q&N*ly+*5ay$h*JZrYBFYP$Hzdu`GQXl5rC1cbU?IgL?C9mg@5)@@{Nycw(x zrcZxjC35IYv~`0d3RTg2V|x7Hf1{u0xxrJJl_FN=C+p&(dgD6m7!$o0Y7=K_u-IaG z;`nN(+AHE#wSq5$M!Co~Y7*!>uLJ{c#4 z1F)ID@BSB9T3QN(;sUlYV3H413IJ-@N{@8!9io<3Be^D1I#4E%17(63pl=(gHfRCX zVfcBOt&}E!L>etX+DdOJiA8U(7SdTU{NQX~bGqEU#HqDKA4|;KO#x^+B$3f-kOB=AycKM?jMpODOliZ z;Xqab+-aD2kmxUke>C^4#v*r=;}Y8LmbVukAB}>Ci7E<`y}Zcf)O5MV9EG7~_Zwx^ z(>thwJ$aIH25y-nb~uDK9rR8RsMkuBl-g5vvJLLK6#?N^Z|5m!`x}-`L0lvz48$D z3;u!yDDI1A(g{?Qrp$JlW1SXntG;VdN=z{SuFRrKU*w``?Q{+`M_6qmqh%V$ai0P~ zx+FbI9(loor=C?QXXd8eya!& zBMy6s;d=S5i3tgJ7Jmfs?(OYC;i5!Jy3Y5_D#EO?H(v#b46#=7LNAocyo5nPTt^Pv z7_8LG(~fx#^cc8YqlnctDB&zWzz^Q_fhNN~IcSTeK-H~9n5fd>MyP{jEHC$3l|~2; z>P)?8T5cptl%sHLCxaZMg2hlUz%m+su@Z>t#x&clZEW>zn>m)gDMAP$ep>ENXei`Br6zTiKENj9iC4S3mM z=H(K@Uk5$c|DfA^teq)scHvJZx`r?@Tr`&LV-dEhN5l#0meHispMdIj$hx&sWc_(M zf=@XRV)3HEPBDQ2z?+m$Zu;C)_Ef&hd)$H*8JT$hoI*IHwgJA2(QMV!hd; zXaBQ!R21LO1p9qKrE_1qtRs^cN0X0_FDpO)yM8tHFlXZ48D(^f^Om)xCER#D=I21x zkl<^A;NCYeenV^P2(!tfg6Q=p)hi&jv}xn-8w-&fNYZTcy`j_jVSMn$iF|{1Hopee z>F&dlKR~_3pz0YI>;aaUm$e_SL~krLXk8PNBk!Oi?}s-w-Psunu&>-Ez4SpOe;kq3 z$3Ex?1xEc84_(u@r^{b*bFIMOD%$JH{(6>pPSAX(e~|%+G|Y2u*VEk1T*fhTP zT>y+&PczQoYX)<_$Y^|&S*{gleU`I-m6XMggn9tTS-M6}9*vLv!i zfqeHpexR6lCW)kGQ`Tu=1f&`}-j@Td>QO#Lwo5rhR=AkUN@j@PASja^t>GBS1{g$% zOT-R<d~t%HK{fT5%4U0u@kcKAED(4*+COhHY*oB{4SCVHDfIQ-;sv!=Qzrfo~Lv5_9D)>M}%vEDT|>|#*Po!|MnT7WnA z<>mdG&)EO+57sc?1qaBSZ{~IH4rU{2B$)p&gn$J$U+kq{%o3pRF27Ms(ci-H_mae> zlKwp;-5ded^bfy9-+Nz>Tn7Jpe(XUMt^K~?-Fa%1ra=1_$S4SUZg{h~zgav3ey8Tc zZNJe{f#h$3w;Q6R#drU+tk0>z*4P^%x#jE<$oAIz&$X*j9^tqJ1X*od_Qt{4Oy#@@ zWsDpR=&@W$-sM1%@uHf32lWUf0yn|I&;;7=EJeD^uR;tlzH>*36ehS6}Bq7 z^$X3$c1K>#I3oIYNcAg9rbSW=jk98KATE5@U%vzPCKx%2akHx>5QRW`@tiRmiLKL^w;E7`|;pK3lrGRIU z*)sisTU6K)74L6);J>6>z5t7U%9J$h#so>zs%A^)ZZbic6&E~zM=tr>M2{@<(*3J| zg{~4tE7u0qXNk)Ft$Mzyh~y-6{cxQ+5OX`4&OP=jC2`QM$K`M!qXAR`r)&*M%xQak zQqrCP#%0ibhrwnC1=0h+pd3XMd+0r%PSg00B@RFn{Kmb2gzz6X3m;X_pUzmFeNo>o z>6U*%zenjG-w>f4Tdz)U=nt=p_0QJjfFzFSv>C!WAZ?$!2$WrJQYoPFXu5&!Q(YxM@0(J5enevrd zPS4<~jkzmUR1xlSz=lSgi@0pDy5`p|2hT*$9i6TF7G|Uc$#-{PCwu1Inc6BGH;^<| z`<-xzM!hmZdc|k2f+CamksPIt_zEb?y6byHhha-ii3y?eiQAx^Qs)&kG#f|C1ecD5 z>-yXHk{K6T`iVxn`*_qm=eouvVJtbuR&c~^b}Vn|`j;TG?AcMzp9?DItxYeWS*aMg z_!UlVwhyb$OOLqI%UJHL1KJZHX zg{Hxqo9|PIdznid#?fy!ZA8u!{#^L!lh=uN%orC8+fHpE?_yu2zUk=beA>%OrB}Up zdI55J-#?!? zEnwGsC%TS>g3Gc@9F!}BuVLWVkB~})W1HyOQWM-Ru_eAcjFNX<*2yD$iC4?$9n(^u zn(~*Vj!;Ljl2PZC;Bhj{Sm>S0l}42lDet$aJ9A_Gl}Ey0%6T&V?Mkh=<3CA1FY_M;=mEcUS1*w01OImY zy;0n+D}msYw-4JJ*}0>6^62I#Fb+6+gZ+31-+Koazn(<^d!I}f-FClQF86SmTwRPO z`0_L=z=Mg8-|BTv1a-gRO%CL=Yco@#6~{kxbjp1-nlb5yh)d&cXj7_U!O zdSQ$xyYv1{Ev2GSu*@5V&S0TC+^F;iGXe`dq(9QmW3es(e&U<Uef2jJzc0x3W@2{Fs+lce62|0L|{KCXtJi^`W3sQEzL=esvm8OL$NSY zK;v}q?aqBn-OvNipw2~@j{(g_s!mh0>ZvcyAV0q2B1ly?Y5N3IiC`xJmb#9kg>CER9=5&CMUvBHaHvF-QS9JKfiBHCHyCp0ye!i&XHg?RdwlG(`Bt;G?-r}o+V z&3!TGKeEH}+mKR^{~}B&=0cHap?*8^b?na1$r-3;0WxyD0=ob)46r%SzW|a<%M&3w zOn9Py3f^6}O$R2vWo|(IXC*!Uo_}aT{=C8lqsxH%Fq;1&9~Bk#0O$sz_XK{b0rDfn z2xAi>**Z&Nv^#ja_yR+=w?W0&7|NR$5^$2Tnn;AXbQA8KY9z&R#s5M>{k*}|u$)!= z`cuAdR~b>HuDye|=2Z*a^x5WP29iDeIj{!^v-_INrK_}SdhVW}fn~|eww!h%B!iXm z(Bu!!hM-%>IwbIYb!+=+WL28)uPNiMu*%}a86?#+A2mN@Racl*ZqWI@?DU$r$5xrd zK_+Ho0P8@lp$d<WN;F_s5d4c{_BxlQ&o z-vO8L+|(RZthsC`)){Qi^jWgz-ETzGFBj%g7O<6{&A2Ptd@OovR^ny&5*aSsBRweA zbsbe-r+ltV^&c+TiP>)PoZG^SK#2SdBw8@7CWHdZo^PM~hu@jq6K^0{3c+Re;K%W| z1RRI9X&lw}y}+c`>MbJ0;<@21MC&ca?L&$WLDWv(=^}UqfJwc@>h#{c2ZFWOe{K&{ zBmqK!zKyaf*BLJ}4t8V|0p$r zesEVER8Ovvi&B`}I-v^6E-@ddx~xsHKi|{_sX$OB2}`po<_e6Z=8%|rdwIqtfdM~rh5W4A$kZ-#S8cO4j>8_AcUHSmjsgS>+NF_` zY{S|uEqYa1>*0#J^oqL2E;=P3xvHQ7289E($6ZXu&{i}n9f8yaJ#|sX;f~PwI@d-? z5(8X|L48XG9%S151&k;pyKJ4Fa;|1TQthg6sz7aJV8L0_ZhH}roDwvB?Z6i z0YzQVab^&5h{;2w$u{sd1pFx?hu>ZfeqHu`JvEtrW3pe}wRGpcA(vku-LJ&KkD0;9 zJy%y8Z|0E4$*mXVxnrKUIL-eeDc8c)N11!RR38a?rLrXjX2j(%Y!G!Alt{9l_(k%&` zwnc5z$Zc3iYqot1HJ=B*zR2QGf@wZgUknzr=9KBPu%W~?UEV3F&zqVs_JX=vtkj{h$NZ@*}{iLHxjgK25>Xt!0E|jIKcewBw>=VjS z>|Z~e5`?y75pYH>;oTWUz2bgHd#$cbctKr0HMIn=?B!^6pJk*3E>!QPf~%rnVDiItK;`qBpCR>JeDV8}VNXAGKyK@uZF817a( z(X33~fXChrl@RN9of$0`?r!J(9G*ZmZXE4k126xtppW1b2CKSI=_PLe#{EWlY>kr$ zfM(do@VTjg?plLB0tZC}fv?hl4Hahc{c2KVQ`pXLj&<7tPKF*4_4M^QOb4^z%DY=C zhK@j)+Vo1d6IL)ZZ5khExifySgMs+Jv+6U`gF4k94>z8?RfGZt2Fo=@5M#V%zx}X={mZhGo=ypmNkE4Tg0^Bft#!>#Gq8YSR(0EoS3!GX$yS_ z6OOXWM4C~WFO{+uT$(nx1~u9h$q`Fc$$Ejzstv-(DQKMIPdm(g%ry*t?pj?bDcw6@wXQ*mgNukXikV}M*o z-R2vd_AR>ZJl2D8;%?XknDOIoBp(PPPv#wpLPvS3&OYbOqKR>iU%@KIx0TTSrq@ge zCl^W^vkTZN%F72MepBR|m5B{N30tw^B|oO2#~Ah2zw_20aayIrP^ZnsM9&$?Ed)%6Ji1RA$nYkBdgvL5m|+l_>?9Qy(mz!b2BiV*5%Z zU3D#FdYSYzqs+@tTrF3=x0@O$nY!PEILsldvn&|?LCCs6C+78b&V5;|4$^raHxey8 zxWTp=k1xX-nNRRrakR|gsdFCW zc)9G#cWe&D-|2TDDVXYF=>)$7BjsbKLRPzu@H=t;(u zg)f$zEUS09EYqZ1|VOCe$ zsD_MgfK@dRwOFdc5FuY8M;J+tS_#b}Rog)(En=acHHT&4q|w03(&q+_W@% zO3)QmX}9Cobkiwphq~7U>MrqWGyq1a`Us}PN^YIiJV#4B-m#-$^zl?>R#nSJ;~GD* z=1iq=SF@{TV%ojei7L4MlGp4h(>@;NMYJ+~_L-Hb(P3$A2A_y@*C8oHaH8F!pWyunWY#Q7Y>@~rU-Nkv4o~3x=`V2 z6W}qUe+@5U?6;M#Yb*5G5|$iV(|P}%!*~qts!U((9w$sx#vk|qLrVyy-P;eDo!s^& z~~9jw75 ziQtJdL5pG85ro8L$>U?xlgp$Bf89>Mz7(#g#B)4gJ0*KTF27Qag}Z&?{KVg_38zj7UM5D8$K>ThF!4Qg6Zu3g7Tgga=E6 zl^HjC&RF01;s)G)AEu@?YqP{oaFD~m;CwXk$C9QXaZ8&`bl~&2eRXxUaWV9H0BGtJ z1twxiqw!2un2e|I0|Dt^?>QiK`TBJi$f2akbkGuSeCF#Ibg+!L{J3AbD!;{ICY0tL z?zJ#3;ew@6IAXfftV!x1^0;3+nyC@ck|FYi|7&XE5cjgm zgtcOvt1_~ZO%~_7AKKf^o_(#+3`Mp`ldMoOmtMC6@_*ArVPnJjN9?^K@O}z)>dd(dY*m)$|S5=`!nqWxj@`TBS)v(No;5h)80#XYF=g1X8g=cOPUk z$Slq=W;J(wws2^iSRuB?_I5^N!e=Q_*N(5G3hS{Ba*q)3LgFK38lsbfJ*H`s!AuL$ zf}igbGPu1j_BxqsacJjwD-ZZBVD<4tHkJy+<#|SuQbqaKbw@6{Z{}#nBfEa$0_P0YVfgJq zu;DujmJMp!m|>&0)g*IxjiHsHlQ-jGVlxbq0_S~8O&h8C{>jt~lOqn!DV^_zciAWW53@Kt6sfCMRk&rB#S6`60ulF|EC35>Yxehyig-s z6W$CoiIE83pry-z*9m_uYbM2P19&FO1fuaH~ z{kJT9e6rrUuDZH9*cRcKE>Xs!fPCz&g{E(Q5r&NWpJ(NbX$6f{5GwD?QR5Gv+Z!D@ z&(KFj_tq@hf2p0ArZLz~rdjYKF)wG!(HIpn$fP9$K7uncGD%n)*T7P`vyqlwuMK)T&uYUACIEMT&ueSRutsQFAqnTe>0W;gd_H+5y*dwtHX9@hVU< zr~FeczlVkt13o%6^;vcnj9Pp%@g))QXEREPLZ?9|BIlIA7z4T~~-e z+6!B$?D7<0D*TQa$B+cY#<>qJ9%P3YmQJ0v>EsE<>uVi5=zr7qAfI1{E9??AVyS!@ zG>5NAudg!b_|#)>%R!PSy$6u|=G3JiIK;j+rE}5a}b{J<$5O&Vi#9(ys zq|IPh)FB1P))0GJ27GZnX({Z1!ib3S{?zxwgnALuVO>ubol3lhb@ zF|c>6cjO#uuIJMF4}>3}4@#{iZ)j9G(d|nj8@i!KUK+gr+f8cD`3qm5!qp|b=S`pv zxTt_ZWfo^hxC^^O^_vtI@Z7c5j00P}fr$yURj}7L_7QPC=I?0$7aJucELN!bF~riJ zF67Gx-FI^=LK_`qI-a`-H8yvA3hkOqM)k2o8b~VJZmb+=aa7+~OK)ncAO^Y2L_;J6 zGd_$69+w(DONRI@lpldR%H>@&RM$^?z;BF0%p3ct0w3q!GfrZ5qx~%JJQ8)6vd62- z`GwqIpu-#15sviiwsXiTQ)7{K#C}~aW`nf%O4|k3gQO7N#^9+WNHIHK5kl0bc$`~h z)llSX+o*D)r-NoC{E_*4OKR`By;5~llvVx!DJi1@o^)Fw5>xD_ut|ps46n=sq{x9Q z!E`QMCp}zys+mOeBdQTG=j-iCa{x2dn2u|F!nO_cSY4OoI6b2OeKm~BIRD3>(h5}a z3XK0dUzeAPSY2HWy?^}18B$%F&-@p<{>9{3^;F(~<~Q3veHFT)PK~AWIy`C1F?f_% z{1mp0IE{Bj3_2ufb)a{6%dzJJCHLk78n{H4dV8LNJ>!4D>RCyPY*r+Din8T7L9V^n zeBZt2JjUQxy|T&R?tN;n4UcSRev6*MrK68@dEuv{&RDZQ&T3_AFh=IbQD(0rGB@@* zI`=igX=WF{gZfm!)A(<@R;z~TEsYfjn>B9iR3ItIx|-SgU?nd z8T?y^#2E+mwd%AZJ!sw=HfPOft2#Z7QzLlKo8L~UY}GUNe^QnA<%1>sj?JN9q z0eM@!5sIuMl{3z`!D3)N5O5c;+0uCww~ znvx&P(EK!)I4bH(oDLx#Gn0sO6P2c|q+I^?<9oqwbMS?953;y7L#|DA$FJEb`b&LG zt%zHAv;Bz8R7|R#H2Ix1d~8%R7Tl&<{12k|0PO%pa!3TR9OQx7fJM|;qmhW`p#F6{ zJ$c>$zj+RhY15T6&VvN*-txmVllG0UrNw&(xh?H9l+mEfi{+g6&qY6q{51Er5kiaP zfK?qpcC;YO`rl`Z0sq5)X5hDBCI?!Z+X0Hrbe`iQl>l13b`mtn{#b_-^x4AU_z0J* zFpO~moVs1+pa)2pgB@1!H^eF>g#+40NXcavP<9k_5H~|B-edgyR ze#-Klj@Bb$$-8U8g{uorOLJ*YZe>PCik8fj?4OD}BrR6|@(?;zMU1=q4Is^EQWBb{ z+Mu2x*0_1<*gAD-Oh!IQGipryKxy@sIxZOcPmD5>Iin1D{K>98NtA1tc`T@HGkq;M z8~p;Y$jVti$6%zwEI`m-eFV+Ztu zOphX7(DzCIvv1L0oiR$svOohyg>UDlP}2A29`n91l*N1LuU}ex<}M>}=64jO(EBUa z&~@bgF0$Oxr&MpVB-~&XD)e&iS+vkS!HUD zo*JC({|6waR-ASN0OWy3IfiK|3L51K6-?!K7kr&-2gaNmJbcY{Z8H<39OLK>gFZb~ z%Wx5n9GnA@E|}mhJ~T4-qbCe;5G;3SKXf=E$4W#UWi2uLhSo-j25W@|W0NIQx_{nz zwwOxZL~3^0nnH<_2v$W3*G+)U-6uL%B$LI)(^YX0>N2a(BQ9t3=zr$53zN&f{6I5&O?nuXfk~5qkfWD3k zRk__{TV8k&x=OiQ!}w?z(hbeXyM#7YKDEbOGBg9jg-}@BqpjIgT=vbpcb#bM~tRb1EuC$am1|FV0q#G1Z8i6R_G$_!H?6!<*rNW#$hfYH<7r1@I4}ScO$SfFU$wh>A zy3?B_%V|K^Rg*qCBP}c}tP!$@p*MG=Q#EuB^K)7;d2+Z!n3N+9f8$659(MQDeHAJ! z!b%{?W)RZu=N!u#^SxFK&Pq>W-mekpYxYKZ?yZ06wIx5CM9DV|b20md`erFh+d``h zQFMrJmjI{ z+X-a>#Z>fRrWopJaP(@yq17nLYc`>&D0BJhx+{AygF?iSIaS@-%z@6i3qdsOCc~7W zQg=kroQ~ztpIn@h7M6+POH(#y#o#yYnQM9F8om!2aA9eT4Kl|XXj|Ubfrv)@uH>S8 z<2v0$48o9LuMTBMr690?8NC!o@i`Ts2=R~Dh=;l(63!gwQTO)d#4)#T&24Zz5 z3~gW(cE)+19B#fV$CYr&2W^}DAwm(a&p3l@!b>P-QrM)+(JYB177(=_91`sW^ZyQO zO9vI<;7v3^D|x%K@)p7=|C)-~DoZKeIM3xrr>WGkHsK0qIQoEj04`@*_@FFdkYI4b zvG2;Y0tU#^no{RoXMjmeZ=3-QD&5uX`Q(SUgi9N2F z!$tG>1vI-Bh)zP}>HDO-w_uM4V5x!Yq*yw^kb+E!dq# zApQZFvP`QI>j%Hyy&n!uv#)B)spb7u{Rf(8PR*O(-6kFD@3b&WbuRY65!5pjv;>DLP!10b z!Hx34HRHjK7F^IEg&ndJEBp@w6nxZNh#w7x1)jK*O#Pab30>0D`h=W$>7kEua`%#N zF6wsnRxu$t`KgbpL&BZ_pzX}a2lFYGX#A&jK<5G_4I7M@y(hbjTR)SfpOVv#^@9v~?9c{={yqQzWCyrEclyp~JmUex%T zk<>;spJI8qy=kenChlBdfW={cm~(T9SQiS<#6^(a6@`vIee~O<`f;jS4np=8 z>5t8n)!#kd5RHqP50>+NL%xEfI4h_KmGE)g)L|v*hXTVNt7p59;b`2#nkr+-#Kkq_mHm{tK+Ex#w2=oF zN6fs%zE`ss{&W#N^(-voCu@FlH$O4lTI=fp52amlgf7bxw;xF6%48Dj18`IP;3L88 z5$SW1fePHBPaS>BCkN;kj~UL78KjS4XBj9y+^m22xDRC@_Hv{1*EIQT-n@@c`m9F9 zX~oU95gU03cRW8el}z_{bF{@zPHNNE|L}Lwt)0HWnSx|wiS(l*ANnMhg1t$8M4)v?+0C9Bg#8?OhMB-E|gobX_V{c&}S!`DaZ`F}K>gI}L-yvDPQW!qS` zy=B|xvTb{{tYzEQT6Qhl%eCq}-`_dsFX+|teV+UN;JV&dNW5r|(agC~=^%<#Qa7pr z!#Zm@N2?$aUO}7!eE$iq8JG4WPMFl55J36gov*YW>A3EXYE2HHf|!)x#z=~ldhL>^ z@P-eLgnW-`1J)W2y6E62 z0s&sUcC`jbgIrv`v=QWEH6YZsz-|8I5 zRh)Xv!+1=#NZd=?j2a0XMyA=p$hQCf6rNu|kU`1OA&8-8_{+zA50{dL>wVwjL+Lq! zHq(PEMr+F*-BAf^KZB7|p@}xxuFERw#YYnIMr)7Tvk2Q`P^lq^JPC?DoWh zAKJ82hbl=wM_bt@Wl`kILs8286SF(yy0GD~kzr4|P6JC#B{etA^P3)Ijg|7|rwqS= z7G1q}@`W{wzzmb%B#)?0lIUIY6J81dkNx#M)yK8hN;DKnLk=P7TIUbkmWGwo=j3Dt z>uQPfDrvg}37A(n|A~HoR;7O}>n*M!824X^@vr+l`dHwa(3m!qtMPcuQ-(uFRE>~q z6l6mvWzi)zHnK>2Op;fjo-xVM(%X{o`wL?G=kgP1jQ+~SMwcmJQ~+6O)02hUr3ZT~ z2q@-z#juPR;=u)}hv)fyezFZ3rymB`3Dzd~2rNHyhgO%J{x(g26Sl)#FS0->wKmD3 z78e%>9k=skirjO*j0r{^+#-NSy!E*Jg(>^+lX#W~uCbJnFh-viW6M>`t3y9Ze+3VyJnxyQXBu{61A`SD; zpiODv!y#&^ozBeab~)kCrC4K02XL9s5PxQ6-G#=)82vlofvyhjVl~pbr!9bmWLCon zzNyMLvR0F1&s4S|`3oPk5$d22nZGCfV7qx`pSA5N_|C-a=pWHpv}xKB@<7|G5*-u- z4gpS@^?25k!KgK>Vs{-lEu9GaX!EfQqcu z5zvkAWNMp#d41}MIO9uliLkJY49BZW-ZzZ5x0i-ipp~s*67UdW9_w0&^OYzJExd1@ ziKL>8xU^lmmbQIz8?UKazOY91#c}QP2x(lQ4|nKm#kN5{PafA%*`peyScIP^J>K03 zG`7*xH9EkK@mu)?hmIz_45KJURZy;jzdXak8C74w!-XmS-0vqnCW1{!9|cOu2sqN~oDbutA&c!b=5TUy##l6Dn(WQ_Hy{-#&0f%*dG2>IW$u3+UB|EafZ z?%)t_yIceC2s}5>3`UQSt(NN_<%=C!M=p+5EZFf3{GZK%m<&gOfcOuVp4$_AQ6oM< z%|u^8tK1;Q?3p50e=Run_umq4g+G_h5Cyl&G^$(R1iW{!qU}mQWNGD4%BFZSBs)p+ z9ij+ddJ?WWGM7mW@6aP_l*Q?w#l~`B2U%j$`Byy^lsX#H#v5f(y2aJcUmL8pMpN^Z zJh|G);d0#sbR|9>pvWzE(~w=l3+>#O9wZCQ&}3aJrzYf<(Q(s981R^q25Kf5oMz3C z>oD;(C55iUPCJIJI<#$#v_$Wi)^;T1r<{}%bui)R>WJbVpjE^MKJmc)Fjq<+(X;Fk z^q1K)R99P5S<(^brKz)Z+Il0t`bR}@#%%YeF2!Oj+?bVzXKZ`hX!*XWEd5*i2GPBM z-B#|K4A>yt_9lkxrXx^w@JCJ=guc-S2~O_;dJw&C?=;{N1gTS2RI!D1^$aQ}xz_}) zn_viMTug&TE$a1dY1h>SmiwE2Av z4M}gKhm!mI3Rgd$;+W(^tcp&3*G1S$dE%NP6|QR6_VV!fDGw z8=reE3)tG^E4@VNXzCas#I60WEo}wf4>z5KSKv432yp4B#?2aRa~>=NQP-jA-@J*X z>J2*s7i^kCxSZDefMZ$QZXf#iRp|Ei7Et{FCp!o@3mDZ1{FOaYeqLlOxU|9H6&SHwMvai5@FgQyR|i&NTG zuCte7x`u&y$s+u1RQF@uTSDZK4C=dDvadeia}=QsJ5PU@>kvgCaEpjvCyf9Lu}J%Y zeiT}*t^c(}0`;|IF82uI7IEwq@rf?ZX{zRKih+KYGsHz!nX@44E~#{gHVZ37=ug2+ zI`slQ_Xn~uetr(==`td_?RK#U-gYy*SqYS)^&EU$nXBR)&H!sjlN-nB5h87NmL0;^ zEhW#z-?#SO)%7+P(3DP367$^GGl3SY4BDM!QN!C0$g&Q~Dhwfp2?D6PS%yzrlY~OE z-#o4mHjsq^b#DX!7V4iS&{-e|;Tc%H69{%m=8aBL%?a3~4gvRx3|;%YeneM7l>NK zLu&m{Mw!A>#dj>aTdOd&HmtrN;@K&(goq&Ii3%rl?(W16(QckKuWUpn2RB6RAy%Z1 z#D_%N`l?dc2X!R%wd4!z7j$kY7g_2p$EoA?)+Gx*?D@{1TIlA3!x!i&QPtHwH; z$3X_xGX{_?+{*aVyE^N%Jaby{qm{n=rlzvZ7Oh2m-4>z^oomkvcZtaTRv`90w6OoJ z?n4>qJ}&-ywrdnN01XNU2T+prKl%ajY@_@8`=xom9#kvbTSu_P08j%+p9|>8V&mig z?x!faSC7IwPq6aZX8!8XN19w}r;CS!thI5f-{{QIQ-QO{IJNmo6noT8JS^H{Be@NRz4O{kgL z=#3nkU8TD9M!D95^XcqhTLa!K^MgrnG>$kGRPGZA#f`+s*k)tpnHkcUX4*6G8?wWXMzZJy{k^B@b$(2lWCm6&o+O9-P z+ZAmlU~l4yF%L6(F3>uGgw;>>90QE4w&To5h%MUtd zZ?8$Ay97Z!pTq)^H$Z6s!l?jN9w77t01PIRhAo?vHn07#tW_kP0H6|JGXSDLSL{jG z!PeoFU!Ruw3>*;`z*s4afd2)6GORGV!dj%aP5USgHN5pN?VYKv!4Ndd;d5B z%b8yCeL6ZnJ2H`$D#{`~0oi73jnPnZ1rBLmLPpE`4d08h2TE{zHs8kIN{Wc^+qXsX zE829^pp#}*5?w^m*-#U${u3E3PCRs0rCAwdsb7Occh|dTdRO>MX1CEPJx0Y|&U1y+Zm1wu>Jz)uL=-t1VnMFs;aK1e_WNxY<`WeY^Kin3gBAAf&5m$1O=F>YF8J$ zyaYZSDNaI1Z?=?u^@j~L>*MrdBkAfQzJbKeNS0@qYI3BYTPeDfahq3W1x)P4|%U5+?#NnvAwA zin{G9nt1iM-HyzN@^~jH1^W2C%X0-k^MPo-R?OzC2K157cbJ+91;Doq%rs84yvlqDNIID|-M<E)zT^LjS^dgmcc4s2Om z3$hJ(`>Qo<$s&eL>RF}-CCKY-Ev5iA8Zb@zubQjt=#T@1WFv6|fQdf+gzm5F)&FY& zPELq!^E%g~bojY!Bj%BnzM8N_BF6xQa} z8GGfWXrd>#-8`7tk|sk?K@8irI=c-8t~&pKA#=5ZU3R8;^UIcC+mlyG@4KwOU?Uqd zSnn@;(kVmRC)>(Tyht$8!Jx)8Y70(N7i}TfI}1vLBjzv0`)lN-k;uHx6CYx(O_?(F zUXy#!c)f3P23DxuPGcXH7gvw5d@LFin>fh1C7~^pi@9+047Cd!j-h%s#wPB^9sWt(osgi@YtW@VtvmZh0^1=-Ta!I72&dA{e1MJ@psipH0hgQmSK1LW*UZ^B$l1pXvhqLqo`zxp=f$1cbbKp zzFxI#$SHmkj^Ye2bersx~iu~F#mliS;*W|Aj5S-?&BM< zXkA6Hm;z~-*kWhScbF%4B_Ep8`vjek5l5ZMg~2d#F}Se63h3<|MKB{a!w}i#20h3)!=f1BuIYC@s_|DJuT42~%P%c8x&~cHVkgjsz*gh3 ztZ_*<4ci&8YTnj04|=y|(ca8kDUjx@u4&pS$jEi1q`JCZvv#_kTG8t9JO8>XUp0W` z5fGqh6b*{n2FU<>>u%k$1X@8-An^ICR<0|;|K9vmEM9H z0~M5$r+`%T(ur+mUQHsu>ZRwVR{fZ@tT~Z9aZyzt9Jvb{L!H}A#g}j_;@U&a5@%-) zo!`(uD3lmb84)z3>`b7Cj|%}d9#NQr zGa?BvkE0)U`(9S-T#Gi%vnsWE%(Z7;K#iQitiKD`@BmNHXD+Um%a@OB)c3%xm+CF@ zoUfqq{%ul%Zz$lEs5(0O*kySS28BF=+V9(=>07|+kPXT)we2!w{Nx3wd?rCbv+UoI zKuO(}0S}hXQF%{MBbNV}qoWjkgOBn8a>WYzPET#*TWdO@flf0+8PE*_yI^3T2;ik| z?&cM_bG6*(HBnoZEuX$7tg~pJEzg;E2B$eT!aT`IUEj|nnCI}4$^9EJgbJp!zb@=< z9v`~X%~+S&o@FU!XYG(ky{ydIh$eVi8+|<8pK%N6B^bWJSfpxZEIF4$fp%(QhVq)> zrmfb6*yhg8 z=+&IS_{tYBtQ>(V8^FWYXgU2CWJrJi_n)sd767CRf~+kEfQRTvoa6<~(Q8uI6PPgq zV?vqHm;a3IkiyrHpbWcx{@&aFP5AsaEjW5REj$fS)xi3Im_y?C9g_DTQ2P4LLb>X{ zi>-GikO`O9{Tz4eUVjT33Ou^C=f1k@zS{8sARugA5)avn9B(GN^+X%5bLrRuZWzD< z|9|n~0F8HDRL#J12il|_7la0%wRh6b=>2%@J&JSm-HWGhbhq8WG4w?rWNialk7fNw z0F*$G&iuIXZMq|!8AMJ6X44=rjQsk(^%SZhOQKtdEMTURD`6KGuRdEy3V`sEX7xgn z6Q#D_ebLQUD23flKjDb3B^vGLn6k%FRm==dYZ>REf*O&B|3^$j!;#}oKTElhjPJ z=>gA=*tZo}Lizt24fruTtDNh(3lAiFw*wX!;792KfCJluB=4-CoE8vh1oT6oLM`ts zEMN)Weik0|*?RpG&$e|F@1aDW_5|{Zj@W`UrJ!6&lDA`Na8L#&Ku`G74=Z#XG-3&| zm3sJ&dR(9NzMnDe`CWd?+gr8F6O07w(NU^g1i)R!z%n2z3Le-6aJwS=4PHuv2~Mvo z7k-KZyQEx-0ek702H}x_;{oXR9py7Fq`=b9d|&g2^Qtw@YF*cin=pHiX~HXDVst43 zz)qq|4;Sl*2nJbkj~hcqrL>$m#cT@W!)PX`TbHf5sq<&|GFti^xm|FhReiTCt!sYO z(csoG{dEscE^_|MriTgyq8Q1kYLyX43C~!?XJKpfUQ)2(;>x(wR&uib2v*TwPvO@m z4`>K>f;Jg8S%qNHIcf!JC8RGV{l%o~LY0zF5QYiLUiQ5MFDiB(>#6oX(_jLp6lMy5aw@TXQ-$JPbb zwCb!gw8`p1p-Vw&1Li~xcve-5JARkYp;v{?C^ju4L?BmenHju zWj^n4)c<^x2l$Xd@v&{b&+H)5Myt4w`w^(3#|7*+n)Qq^Wx3uJm8w2iopehp@aFdX z0|Y}3XnhZRXw9zZVoo1#kKgvkl6o9f`0GC$iMc4C(D2iAllXc?Y}Y!zNT=<)xHn>d z`@$3drC!6-Y-}VFPQV?aCcG$|QZ<8(Bsn zUZtxmw%=>DkZKpFrL`Q7Ws6VtR=2H#5>Cg?v(yti(*6xsvNk|OaXBlU)*TAJKjLGe zItmA2VYE6xG5(snUgusL_|bhD>>8s4x8NI6t*oCdaS)AG@j&R$Yn^qW^>cGZ_- zzaUbB2`rcBR2Fg$nL3w7sOru0&tlbJho~f$PhU&lenMQ2RFgvIHeyHVVARqM9-iS@ z4fKJ*MvP(0dOS6PsC|)*N8&e=Z_xJ8$~#{EJg}r=dO~ioHtZHIQ&Q#D*(~Dpj)FWO zU-lPE(-iBK(m#AoMsZtuF4tD$YNPT9PCCZ18C<)`OVFToU?fuD8(bkBn`(X}hh4J4~=&`(O=`8v)bybK9ZXjD~> zB2yZJt83x6NBnW|PGLWZ-p3255*%J6!?QFEln+Omh}s7r=?9nB5y!_E^1u284}@7%4IWto&oGEM*T6sWJ@zH22A4!pQU zY!(8$FtiH~{l^-#k`3AO6o5jyPeE1dCryDQk#&2g=+P*tVCBx!4TIq1ZN+vSpSXX4EqN!*mbyHyr})!gW0 z*NrT?qBu+P+gV?8NV?q`tgZ-W*);?G*v&-Xopt#c5ntABip2uUlNKc_Y)_5!2b7q2 z1FhHgmO^sc()fBUrf>k!F6{kS9t-{ui`@psx4{dz7CK&t>9>NxJJl2`!Js`Jbre1rvNhXV1$(si>55D~tWP(l@d2 z6I`g8%qpHU&k2J8BfJUIKI@a=D?UEmmsza{m1r~eD3u(;;RDdj_k_}EWHwe_^d*yMKrqERq*bk8G|FuaBQz@=T;vP;!1;x$U? zLK+<))!Q_$shYtDnNNbkx^G{Wdvihda#X)#ageZo4%7t1g`!|S###s#>1u+z%S_ey3WP2Cj+-ztct1W|$_dFLs)_ih z16?y_TOdnzw3c=&E_3LSTX5qkjkH9B7-*{y5C~$cIe#buzOC%+e;T*J<<3*paPy{>EvGEbhp*0^3VP$G`3(Ul=u6|c zOwO}gJW&rN1UrvX@Q!-W>;VJDBj*4P9DHp+*<|BKcB8VFIpWe8!j6~0;4~9uB+y`{ zP98`hwCKw5a)AG#Q#p9`-y?SY_vB0r5FDf!epdtQhV?axdXistkKdr2y1n*5q&C3M z`tP*%ogSEVN)vm9f*1hE1t3t@r2l_)(=KM-wx<7qrsv1&*7Wo3(epx?(Bd^1)YCqV z@Hi;>-TyekKhlnC57A?CP4D z&Fz@{wan{oR@%dk-_p{O?o$j1P(|R+^9^w2`&?-O3uB*&UCRdsUTXJu3RHYNfi`zvDu39&*0+Jq*TNp>o30oXM;qNbRLXL z5L!qwITG-cCZh6lbn{rDf3QNwFzCim7AI!~Q(T{Jf}dx_S<74!#n-!cd+`5^Gtw3M zcoJ}7%9K=fHX$bpw_3#Qxg0;R_OW`N8#qUDVe}p6p6+xD*_-2SA z)tJAr{ohn~$UE@Rkfy?6G@;83Iik`i*%eG%GZSkAVlg;*Qo$#&;h^WhYL{NV2Vo2; zJpKN#`F6Q}rKiKd!JptB3KxK?8$P^u6QAnxbrk9LXj`5ZKx7$29VrlGky_HKo&<^(B7fpGX=9XWz1GX${ceS$-kB`{7D@)?^d zpB(S(=Hs63$Qg(stTT z8M|f==A~jOScQm=4zXnJYf|UQLO%}gjF1;|mzxl`80ELP{e@Lx zL>xg?&-?<5s|GB+B;Aa3o=VJt`jFFzGWm(yOQbNdc?-Ts?pjC)3@lG$C=R{AXdV&vH5~VHDoi<&Zl!U2$j+`Rok~AyDU= zWSZdxUL==3jRpPb_Dgum3W|oUR5D0PjZUtr9e?5ygBqR?s#2?2<9le z{3c!Usfr4MlV?G-8>1q8L{Al$s>r@-DMr$(FytTf4;IRRoY2{dfh69LS+7+YL&V-1 zv@JM%nC0y!h_2@heS^oMYQ$A4EnnF>4hrpd`Y{%G6r!cBlZ;J!^vlgav|vwxuYmCL zXThA#9?w@qUdB%`j4UibIGQtInX-dv7WHU65x{@^`|K-T@7o%w@=$Trv$|kM zFsOutRNUfbI>paQ2_;!A3*V2&=r-MJX~c4v^d_z;6^PsDP?f}y;yUYY_I5lx(pu5>d%^0xL-nwW2CD(5ZRu6#a$p_?^-&}yAubOQ7u5H z&Yr?2Y&T~8MCOm2(4%vW7l|*?TI1k^Onqd;L+5#^Xdf@U_WafWV{{s9aXN4*5c-Es zLXz&{q6MgTa99i|^QOP;4grG0LX~n>VPVA*l>&`Q&QPsQ8`Rcy?|Q>?x6K`Yte{ZN zBH+`l-UDs3-9JOLX+qTjqfTHbzq@O)mCg*v;)?zFJK+VmdD|s7vai@e`%695h!DBxCrSxttjRzE(efTDVdYhJv3(**WN5N5_O+@kR*tvev(M6 zX4hc$4ss)-Y^DBesUaj&drK!<+loQ!^;Bnf$(w$B6IO>oaqM!e<*=#-$~aSZ_jDlK z^e<3=yWLYGNz%<8?*RY3cq(~`9h}md>g0{*zj%qMmwWzL3y!52LNQAlFJUP#afp>k zm_smv{~n!zEPCI}+yS6nQTY1RVPejld#a>}d*qsW*>Rzk=y4Q#RAJnnW@U+@re8k3 z?^ds3QDhrEX4qp0io0aHsYt7xz8nQ%;yG^O^;KF1#kAB6BSnEu^p>nc8?#-9org

#sV$13Ux!auoan*k1{`_0m^_X5JAdCWd!2sf?!1P{{ zG_m%wyEvUBecI_-%lzXT%JAQb3Aqqj2xMhB?%}^VHq|X@9fP!@jA?Q!E&3jGbyo<= za>6{3U=AceF&1%S^Eud|)zu5&4tA~y+i@H^EP#3EMx1d`xa&r}G$%Z#lcgkB+QiEB zm#hP&WLHq1={HvB4GL^qZmLNK)+I4w7*nP58qw7?Z!87!F|#F{Vyp_bo|DYO7yNwA z!P11WSFDMkFEM*OD{j->b>eckUUoDQJtPZ~9W^oL!*z@?m=Pj|8T-)<2V=Zcv-3E0 zBwZZ4VeAu83S%{aMcEkLy!gk|oWYKP{F6!J{doxfoHuIv3 zcli-$M%LFAZ%ljbIn9Cbfeqzsgh91ghto_N%!POtF=7$K{`S=yv3xfKy`X1!My5a! zL;Aw4gcZ9Lz7rj&bK8fMcl+G)(K)hR3=#f%I-|^-p{=iffhO)s5#dqcw{D?axd_^X zjGM9l5Dv)Q8%?-+c1IwK95xv;VUv=Q;``&M$>>3G(tU2%!XS6!mshD->#{cz=T=ml zja?ENuqFHXybgK8An)o8WbBH7>($CVy6}_}k&BvVDy~NcmX$ydz>Pq{>t$zcWyz#Q zoqj_{ZeNimLRt4yCqUz?s}li&3;FPXKn5Tjv|cOLBd<3jOc#=Ixh128m=kK0A-aS& zs1bZpyK-P3jnQEt+LS00FvA_McD5dT6UVqe>2n%oe{-Fpyp+BnJmbsnaz&-Th(f=!pnw3Bd49moR8 z{+!-yd{|R(w}sg)6clX+^4_hSJ=8Vf0Fl#8OF)Pv=+*^53W|dFSoK=x@eYF;##z*Z zg9IseRyMqLML4+;4zq)CV>4@OI)gxII7+5RqUu6uefe)fFnIVQU&+G`vsJ^|axI2m_P)B}e={yb-@TK1Xyw%V zlhCK;_=2OXUbz@7lA4m?$ar|G%Jnq%*T?p}yVE$c22)U4(kvHOsvp(Qkk>iibUwd8^TPycW5zPlQP;>7ldO&ku{;I|C`=*lwvQF- z2Gn385PwKUl)c|RbV@RmaM^8rqN0k4i9rsM_?dbkh2lXWrV8vz0gV!1w}2(~G?Lek zy40|fm@C2q{GfB!e6cOFw!%2T%LB}aK!-?LRN|0Vc zNIbzD5J6J{ve1=go$egm*6fl=V72BU(14q)UhQU>BH7vC(Tzf^8>E6j1_kKtu{J>aN?;D+;qd7KIV~iX?lIZsLUqGuny>uKbP5OUEav z*Ix$k5^98a_hrjp@qCvKI9sq5M&{x+vbXaMmpTYNV@#K9xo%&1Lh!C$uF6A#NyKKxTkNsG}Ck7+4V3 z2c;AG+RT;=&EqbfNDmVtPjStx79QT!M;O}rLBjdF%lT2a`MxMME_v|a-~fgfYKbZ$ zn~Xg7$Qdwk*BJKx26kK^{_JJu+=D})N`<8q1@J<~ZCf;IM-Q9q323Dc154{7AE`Uv z4eMc-*4v)djzI>^8ekEIqSE)D=G64O+4viGmE}o;ZaPz6IF!@p>h+drr!B{=o+-d! z)c*1Qs_Tn{JZNNP6 zU!Kb~prkvs2jGI#Aj*TI%)Hf9-*uAHmKT&J%Gb+|OXK96g7Na6b zH;hb#63ag(nvfZBAfTMMz}y(|^9IK#34B66T<N$7h^HDfbkV1gcdZwjF zw_e@qNS(5?RIQsj8I*0_)avMC^H~m~VU-aO6o}CcgJk)DgotQ;^exaT+8-d>7exLB zj#of`6crWqDL8ZbNVaSqu*QYcmaOx(rA*xTpP}e4AAsg(&nzDYy-qs=VLgzru*V?d zq3L-#-knBd)>+3!n{xWxam=0H4)vSTyX<`(kKxJvr`7DQ*LuU5-?{g`X$?UO*u>IOZOoceR zgu7-6&Cs@k??Dp>##J9DzsF@5yL@H+an>WY z3`0@+-M@PJ@gFLIDM@Olq^vA|$Lh~eu}I)UHYrl6Tm%e1K%EVoXp75_e-~pH7uA^! zx+1N!n4*sjj>xb|TyOc2bhNVsu_ry`2(f-6f-(tc0{+r~=v9vIhmM|y-z`zbwz&S= z++}y;dQ2jDf0kgfIY|bbuE1sf1)|O1jBEOq&qmZ80#Km>nWQ;6!yHq{ai*O+!o;do zD_O$Xj>H+pRGBSQ^52pfgJ6HCB>z^3xr`*Nkw$S`q%495qO8OoK4E4;7?>j1P>Ggz zd@io7AJ%Eq_-Q;2Ay*>;bXz~9=_Oq|XtH;`IB-iw>qYd29uc(u1? zM-f3KzWvjiNac;y!9b#elEam=33g^19qxF|G>55p7EQq1 z&iIF0K&w4seNvoUv(5qV3T||HVFJcFA|k51>04liiU&lO2t2GsjaSk!5rW4SZf(jL5MW&3E3# zv+fY1|Ds5}t<`st%A(j#sN>=XE)?c>t#$NBy?7AGz)!$Ik??}oED&u96gCHlg=k|B z(L)@-B}0!FG{ODKwqE*B#tCRL&kFgV*hOuGk#>L|Nr!UDvE>u0YQO%P;rNc1L1abO z&9eO}1Vjpe3><)Pf!O!DKmMihUf`kd*?0H zrf+ZF(1s&>Y2jR*f-`D$E=p$xr3L^*^=xwCPD!xACHveXPAFpjFaNkOJneAOvAXuM zx4A!g*p1cD^0KIrv5rTUL~>l|9B@W!= z<3eh>Cil+Vt-3;KXaib^9mG)7h;+(vv2I02t2;u;j5lsV5jZ5+k)xe!UnPy7&5CW! zv$g_U=eOIasn6+o4>Cv_UIE?RV#ms5&42XQlse4kHWSF3?7XQ9 zz()3liCV`A(tVHQRDhpboHStnZJdW4-e;slEgR4~T> z!-_S&4<;uok{E3r-W-DS4MB^IqN!&)JTGm8;QKi*#$>^?S$SEXi(xBO&Y(OM`#%LutsmjO~ zdslmvj2@l@tC}of{=+!wZ@yLo9^{Th$15RIx(C0( z9#4FRCn6d|5Ti&L7B0kQ*)KgSp8^qiEtodDF9jv6Sm9X53GniWNK9zn0Zg)VC7(;a zTqWuuxT>^>HF|DgTy>hsF8TL7q8`wW{=LB;PbTU%Wa-{rN%oCTD8A*Le)JL|>D^M4 zmq*;(+*E%2nf*IZ`@(HrqpDQ?C~}xI)Ll0_JNxA9tf-+Od3SeL@?K}=At3xw*KwsT z2uM~*1JQWRC)00ASp1^i-~B;3Ejl_nxIna&wD1eM#~&}t*cN~5B(y*_>NUZ@dF_rL zW4F1qJ1N~~OVxu8tr0a73B-c<(P;8tzoNhov=6DmlN|$pt)v7xWqb&rq6=Mh?Y3Vx zyIr^b$+wqpIwpqPiX74^1Bb%08Ooc;#7sYed!c4bBTmrIj^%mCx(VwdUPMgI-};0_ z<)ySXho^*u>5Ee2^)f#GDO1|$+;Nm)F&Vt!7VFpe_I;m7J!%SXTNA;xDCw^%9WE%< z11Tz{0XdHbh;maH6?>TdI`eb2N{PdKLXMo=uR=G?<_3C*HSLF4vU#dSMMZ3D9VN12U?Z7U z>R@D^B~N9-Z`N}18P90&;6J&ylX7wdtyiUO*(*IT7Dk$_E5qsN=vFQsB9T3Rr5)fo z$zuPjA>*5_ed_m?2xs$>yD7uCR6Se6ic>t>>0 zo-8Tra&WKVSfXXBw$PLdF}*Cf`75y!DKxwxx9Sf2Fk)a_yct>U(QMjcU3N9&B$Co^Wug}A^nDG@L@ zs{j;mXaR)S?~+zgS|~EbapRQyNMkah&@}OgJdL{Kor(kdIp-(bURyxA&2~tz!}njN z0k(8>ZGsk(6et<+TzY4gd-pE`j$Diy&a|O$t6vTd4_$uq9!WI8a)0{8MI~8HSOH1r zt=h1rtD>TE+~{m%6!`Mu&Cbrg(&crVJkd);LUQ!}dSwaR@B();;u|iLOf=PTii|RF zeYlwAv~5oE85I@uC?rCJHw>lEaD7%1tzniK-rWXkYiqa!1Vul8Mh*-NB8oXG1yR3rxi5D5p#F_qH!c4m0 zy+c;53YLpQ_Xb70E3Zelfw~Z_yen6ha5BZKNLoMG*CcIuoqSce?bWFL)KE|y6$V2q z@&eBbOeedusP+RNwS)`x$*H8B9=PA^x*Xnt=l4sJutTo~8Z!bVBBY~(ugic1@XZR+ zmGWB3$-+M_tHP=*9CO7CW7zD`(bDGE)v*F_ktxQlFcG-^2onD>;Km!DnAih#!V1A7yxa)wf%{9GF<&joJqL08e!Lw}AK_vZ!AQz#1K? z9A~REIf-%1c|4XVYyBMo{JS?J-#E5VA&?HYxt_YWaki8CvnhkIg)6})pCoo*N8eue!OR{fD7}K! zg?YiElBhM^pr}S4#VUY!nW)pi+L7$6iYE9f#!V|g6YuC&)-u`Cs%h)Xyr)T zs(y6*`kgdbUf_zQSurG&P9mFp?GqyCVbB+F?wH1FHnf}UHkD3AMSIM+*H2{_!v6MZ zHsDfA^0(8xf#ZY{2GR^0F4_i5!%^jDH>t=exwUpyCC~ZQRh}}Tw*WElm{_x?>{jZ? z$OXsbiE5YS#l;~3$M?d*|Iqc;VO?%r*C--L3P`sgDGkyR0)ikd9n#VbA`OxfN-5pa zptOKANQ;1gNOwzjfAjY|-+P_+T-P~&?Y)5w_wQb7%{k^6W6m|Au<;V7MnyGF$kWqP zlB9e%tX(@|M=BJ@Y5Mh87Bl8f!vPMo(?_izr~acyF(OQi7)%<#>$YWkIU?Z6`{Hq) zr;?p(%AXhXzkQ@3r||Oe&F{HuH+u3xEhIiaODl&@9Dy3q!6PRdl9K!(D(Vpzm;dnk zi0CbA)P-bZf=w;i;fXfwNR?>Hf3%e!o(tOI%V5!8FnHQARq4@s-a*A*6r_A>YF-A#% zd&6qm(tBu%Kio-(zWKN>LK*QnEue)=fb4nKaNDay#7V)x>Cue#L3p?~j>FRl=SA1-%8#g{T}&N9y68iBGL zPcjl#j9a9pD<-!ek;s!A1@iqSepDo`fBJi0@@b7wno@E~3KXOZAla+bWv*D zvvF!EKcJ-u@QL^?tCCtgXB|)GGGI%S7XO9DyVX}5 zgDd}u=G&ixs#tbCb8(laxkfAY$ya}QzPcIam@X&^aiAztcW+?L2zb}ry`H;jOYl7eRHG>WhmDoo}{e8X6kR;XDsn{yk0jlt4}$6$)vh zM3c3;x|)!X@aLvS6-oJg!9CB30han{>Tr8CwHJB#p-dG|C#R-<-W8aFhBG{`f8_mB27#?yt35MknKtN*B5 zO)vI(O^KCP{Gs65SSdg5piv3uBq^rBm(o<`ju$p|Fz%B?>C2a@4=RmRO;^|<13MkL%^re; zLlfwtjmIR_0=lBaO8II64AKr(9xP4;%$Nh>HzJ5TmTnIGu9e|}iJ_+yPf`)7!u&gC zruF*o-@hfDoq6CeXT;^o^P*;cZy?yZ74_?v(!}JX1?BF$XI0n60wir^RODWnh0n6H zjfdpM5@RqdNFj4ky3dtJAIiw zM~9tOpWaA(zNcx1Y5SVzm(E7Pr3Pl8UF%i9PVO9OEU9e*8RiobuGlZpLY7D#ZZU7o z^Pm3l4;tdUUy2zX*61UPJWhxd7s-afvVUSXjFI`WE5ZJ+iH>t%mcBVtwzuj);ZYR9gsckwe8uA$s2n?&G=m+?H-P)1AH( z+Ad*jy%Fhae?Y5=K|RFCMD?pM#HA<@)6!BQyKt-}6iu;;vfM~6guZX-*1IP(c>ANb zqdzcskeapAD~m>r1&%FtnDJ0mz8>Njma2Hx=J&L1`@?ol<}_oBRsq)tr&=PbiZFxi z%gT$iJbDRFV(f3dOC{>Nk@DF0W60Fz3Xl>|d>^*Vv7q*F9k`H-Mhh{=EhW1fwsNNs zlbJ~&N&8F0M+e7z5(=RBidpvk0ZEP5a1g`w?mSIn(K}(GrjN>=n#G^sv?2W|H7sSN zUTIgD@tp8&?_k6U#ydPb92^;WYG6RUn(Cr7rQD)KlC^qr!IbkE&d$N{q{Hy&Xz;W> zlc;DKqK~Ixw+*YrpfMTYbJKSnlIH#!f#Ug}l+YKd;QmQz;P%7>Rq<`==$g`w!MSvepzo{XyrYM31r@j#&!-Fd|`E#t12>6Q5wvd|Z6R z7ubrI8h&&kwZJvbrkW!gNgex8J8N)~;_B*Ak4kJ27k}t;(+M8b`A;gB0VR^HvI&uT zE%#76KOF76&u2MIK-=vfM5Vc)^^s*972CU_yQhF1bC*UAGaL_HuFW=9J03IRLfwDB zUiE_pXOzHnsMna)d*4w%jb^zoL#63oR7BpzoGl!jSz1|T<>xakiIu7Rp{rmUV8*Hb zROF5y%8?KO?~)P+4LTiIJUl#9uY9oH4q4LE(FI0EV*e6c#HN=s0S+HT)R+j)rbNIj zka;C*fVK%jiln3Gp6m`ssbs`MpF6p*cs&bL|HkH~(AD8^K6q+55vdkJ;?l~mc9CsT z&$veG-(izmIA`lt4ikyr9Q!_wjHpjDJjfrGgQB{sKLka)~+!;Mv?{GcCiX>2L9#cIR)68OF(K(8QCQ0Xco+of7<o_S|kRl=)*;sA;U#4vF% z=2vnVmY7gG^dT zjEi^P?WecLL>A`VJGTi}m{HDOb%rvEF$yUf@nBF&si}w2cmGt|tuyvkVDze!RBL`( z^9+-rzU~E)OgQrCdt`_3Cb>rsEPtj7(a^H)(+}let9~>sU9F;T6|!Myy3`47L$z)F zokFSQKpkcAqVu(TyARustjVpstgKtbqt9QzJ_<0md6r@sE|t(H{qiLh0Cy&DZt5t+ zgoiB$zZCJydE9@0%L7?U8+f+g7Z6Y~A*xVY6bS9khij_za>;+-1pb?C&Zwy1!NkH^ zJrrzQ<3DoCF4(h$!lS84tVGkMTbEHJOiE8wf8$Esr+s%<{!v1D(44ln-k?dRiQMli zm=;ymOK)tGblzTDrkz#)43msKR11PdvTu8t#7ENwE(Yo7xCf4I-KY75`()SwuYv>S zxiTblh*2)*PUI7RauNDdqk4BSI^LEvu2Px%iK)4H$@o^eOYG;rfS4k}dw~l;&L2ZM34;&c_dpkwKTRuwY_mX9s|wqN7u{cHp(z z=G|co2@J-YR(<>SLR(u_Lt`|+ESi&068(5a2CG(x--m3bXgnBdKV#{ctN-;N$;Mhzz8)t5^IWbkNmx<7)Tp(p{wW^xMaegpq1b1I_5AItwE09M7A3o+xv9 zjTvV=D6$JT{^*|_o&4_sAI-dItc>4ZuJDrcd`ol``aAUu`+bBV!e)f`Ybj}ho?7#x zL=J8QJALpt!f<3@#U^=(qR?uiP8%lY<9Fx28BU*>&?{t_XxATEcmZaC2^*iJ^!JDe zidh!~(&~tVFcW&&_!4EW;Ia43jkut9dE*x`RctaOs&TwbwuH^Ye2- z2umIth53KWjN2olINV-;{2woX*YYQ=(+#cW-h`$@2B4LW22V}`?qeUV?AN`$MT}=eVZH22U)x_twtwxEXE}TKTf$L}@=Fb-f&(+>XMTPD| zrKIiNp>pNPeRR*)I;{x_8-;L+THyH;+J_I<6X!<#K4fF>3iQ?Hb?1#{!?l)yFBN{< zv@G%B$b~G|AReXHJ&cjB#w&F5*D@r4GOSR5x@Q{jGR~_5S_);cvpP zU0of=UcT8YRQ7p(vYMdpjKSG-YO;32Bv0`I0%DS*Jjp1%hEJgKr;>XZ=0!->!5I@6 zcwbPW2*`E^eFsC7f30gsS22m%9mZ(VbedY87ZL@OxRy->-r#CVH@Y_(D3}vGeB}P? zQ~ukd-G3LyNHTXA;!9Q5Hq*`>Vm{J$4cZrwJv_ zNjQyC%$u}ytdGnhRl@CfL}R6pC3KGsXWCu>SM(5XK=gzX;c=gZz9G|!oU7BtC>N+y zo%;F4kaqu74(xYQH~>)LbOa>4DF&%__o&n_4LlM4KpCSIj|#7g$;rvN*niT+L|~Vs z_qh>&w1s{<6eKx&eWseuQ{2^V^7c^27s)FMx18P@Ez08OeTmN9lPx8R(YYb@ho;>l zpY?L}u`_d%B2jaQ)#Y|YS9ndwNrw=*u)R8$#r+J zbmRTg!FESs)Oo=7%!rGHIsAveNv1MC&C@|HU0Q#3!#RCMb#~H{=g(>IPT8aX#dp6! zGPr$9(KFQW)@Kwx2Fz6l%63~pxD&hJ>sK9T#ghYLmN>&LFEc>BU%zs6c6D*5^w;Wh z%=q2-uo3rt6bs#RyG4XWC_TS6^cfK@_4R*lj22ro)ZQ(n3=yBT?Jz#&4*RqE2i~K= zH~Z=EAxpEH>#LHN_52IXru?u|#Lm~~fI3MZJTcQ*$&TPKfp4pcZNo*%x6aPaybn@l zm(t$M3^fkuIW$W9`ij9l0Q!!ts0gAj&Nd_d0o1mrQ}zP){Vw-PfGpT~Hc>`)h&l(g zZHH7qm+SU%QSf_BN*o*5?3W4wQF*xd46b&67`S|#hu(5uws^X

g2tn(^#??EF-GxzF#V$zh}~+IqhPnSv>Fd6KK9iL4GSo{0J2lm6w-yO>AZQ&FnIw z2FUL#gZ%duNFTF3s=Kh;j>LZckp#`rmDF+czTQe&*D&U-I#xon zTkT!3;_B>N1y2%?d&}A5Q8Lj}$&fo}jQ9O*qq!2F9Qko~Bg@6^T|5yYo=Ew$lv`qy zn5?m7mK$ZvOMZ4*dJ7;K^TUU-uTg|948+7;Lwo7 zvu9|qc&RSGgw(a!Fa2(AMkXzJo^b!>-g{$`laIl}&Nab4q~(xPCfmyQpkL>bEHYq< zsMTn?6^D$M-CGQGg3uAE0!hk$aG+~Au$?Lv<&H~P4PF59kQw3WC4Q`bwr#794T|2i zlCj6zTispOd%5l3(E@rebI|b5%{vFg%o#6E`H66t#;U zg$0!_UzLf{1T=(uO#~U;>eJdo;hK%-`bV09og^Gm?K)0nOvuiHNdhyQQo)c^|<#3d!Lh`L(yet{7*K!_a zX7nR!sBv@$z}!0ur&U!~$E2o;%$rpd;XTKkx13$FgTk!mipTUbQ?vpS;R>c1ZscJv zww3RnG4?pauSTLQStwAd^b`jC6nUG|GIx)Rp(I+gAlvV4} z-ErB1Xe(ZHvrMkrzr+|?;+xgFM+vQc#P0X^_a7b~ztGivZe_KeGd5)#)UB>oz9$?^ zh9UFp8EndD@yiA1iR(q1;n@npj1dgN%B;y+Uv#BCJkGKU&3?&HO71S4zy-coX=!O@ zuU_r#_i-M;)>Bbcl>j}dX2z2h`hhHnJrAT3`^}-+9)N9n26z8bPFy#7oj4!#3n+j0 zQ&d!BHj`Ke?>ZMSsI1NpzRv$A5reg}S^ z>2IzjEA2tG{GMkcV)fzYt8Ybh1klUTGcd$_{K#u{&Uu-$>MFdxz1<1orzh8_jPn-8 z&eZU4!3lwKAEQ@{wHvRdD)G!c6CepKL~zggF@o>oR(D~1)X+n zeLbtLF6rk_1%8Jah3DbEgyRa5X5YiRUF9evMp1N#`XRu6Oh{0B@uFf5S*9MF?3X^1 zX?v6$Z^t}XK0|Tu-&s?eQDu@mC3ZhBrrsX6EKV?Mv!3!2;e} zcM?%WU?;eO+`;jWjzzg~`yID`f8T;Wq=;(xdQ_uCyF38oe$#SoL6}^?Bw{d)c60&7 z4xM6KKW`QzEA zIHU)jaSHwPd`iR$Wdb}(r&)_Bcb2G=$A2hw2>(!nS2!>C$`Ml)m6uK~cCF6%3q%JE z(WsidDDzFFEQ#-@&Av-R_cGcThvNW8&*IJR`xG&<&q1ftDm$VkiKy$i^|J?G^46w* z&=`BnCr;8K1q?l+HEa~=%y5*}74en~w%sMn$j-UgG6p1Roq#V|XF8%D(u}pDrlycf z=74LjG`|Z}#DUnlvevz!WB;NZB0JoIA%Pz7JdE8 z5-GpZoE&;Q&hP7LJb5U|WuY(h@YPu5?i^$w1m?QPgR}+&rY%l!A`_ zu#zU#x(``c78W8nSKzXEk%yS%Va&D6S;DOYA#Y}V5;itA`NBtd>?AC9p{1iKfgr3@YGw(z)ya@G4RgsXM~+- zce@@;29RK?k+GxEO#}pg3!r@4dYjJYpfdEWk%NImOn}6_QxSao(5KEJMzt5Gp(Q9w z3CxGzdR;weyPXF!mB%I zXzTy}{n$J*Dc9b;xQZ*Ho!5xToj?;LQCFMJ%VRXt$@~7@Q^`q}|M8`LO-y`XS4TWe zq7{YM)aRMVJHh{WId47V@l7Bb&%75J%bVYu)k@#pNATB^{*pSZK?9R+8sGQk$Epe; zscY*y?3uEJ29|4V#_nz{(gXj6J94rmZ1<*IBcdeJG%09N8XFsV1q9-N&{0%;Z~vN^ zdhmQbcs5mOH9#+~fP~V{&W@0n_=S!R+3SmJ_a8ea%6RJ&dhV|26JD0(+G{&I`SAA4 z=Br1Fd_i%Va_XXLI9o{(K};6evN6IZl4B^|-(1~f8!im*B=HG<@=0BFtGREvYCsj!7BvV#VU8W;K(b^|UNItG;|GN-Jbho@J4g z-6&y}Z_?G-Sx{U|hlgHf-k)qm@KDLT4Yj^+*~HcsJAW9XY%TuWA`mY?{CY2H%KM-n z5O#dV^EE%er_li2|nq!cx3y$sWfmH~2R0TfKGFE<0Zl9`+2)+wBv zGVil}ve`(_A-f?wUjUllzI_|?e?)jAJ8e%d_xmNI9(vqz@RN%g zy>b&&A|j%Dlch5pVQ;RZ-k#s?3O17X`oy_?4ZE*l0xM)Ug#|~EQ9FLIOVmVSkAho| z-Z@?#o0?A{kyw?G)Ty-$*ZU5-g6coT3iOHutY{;58s_UFYeM_)xXRaMpC=qUIuP(c~Y;QdlE zvyxm)$@zE74I$7P_#9~)8gB6JfM*&GjTq_e&mcA4?4Yu;6Ru^{gD99;syC@@bs`Dh0gpZO>s z4KkJAMBNb(_Q1A=8y7IoqMb7?hLaZ6|>%-__w#4d#(LW ziWX(3>4F2{Q~}rdQU{6Q$WfCXI)Yn(iAw!2DCRDT;UeN&@p|@2{9PwI*V{X_#}hVE zLQO?Tgpw+tl0hZc^YYC!DvFl?7AB_qdTs(EsR+}c#kzq(7pYvVIXlwbJMF1FUUcjs zBXX_r@x<77q(-1dPC3LH*s7k~Iy^d3%BNSOJPKu)31Z+mus~UnNm9)EO!2tt4 z{R1|i87FZ2Z%#S9djCeERf> z-Lk!Q2%<||lvW0>Q78J{HQcZ&=p1In%o`5=t>fWomy8cjeq0@HOidT5g+v zG0s53BSkKAPKubSl(7WkY29jnKPk@c{(k;Xi+5EG$+V zelshXo9T3#R}M{0fo&YeBHhEhK-#pZiaeo@sKP7R`iy6$_+N~PySrf3(^{+HJTj(z zi*{Fo%wg%(I(?2Hyn9i%j7!uQe zFxgXPenHx}>;6>gG#J;tM7NrSL%C(*Z(*UcSGBwug#N6D;?-JA4c6}2T2m9J-@w|F zm}Hdl({a3C%x#BjXqxBXx}a@fh?9GSx}g~_TCar5K8{pScuz>go|H`3{2}jpU8c#G zxdQ=@hhIOwrR?l7o)x>j)6A)=vA#0Aio0-h?j+brMoQYU?uw6&!j+_OdgTkXJnVe> z$B!vW$0$B+9KUFvcT|DeYI19(YFH`dqmyt-RdOMVy4u~(L0)e;G+}c9Mu+WO?s;Ub z`BlG;=At@!$itvC0a4WZUDWt3!bA>!cndq(|GuQ#eQ*dMpu7Hc%0z38>PXAuRV{^C zPRJQ!^nJ=;tLN9ct4ZZJ;A4!JPNAF+iw%_%STc``_yhz1pe{VX8nJ!r*D%`kwM+5v z^;!gzPROMsYFCTXJX&-rhpbY@+Is%NL0k9kQgdJ{5}yUyX`A>(H&%+ybGf$zeqt>Q z36B3vOtPCJSPVdsf}jdPj9biCq@E;k%t?q05prv4VB*$2_!vEqQ-t!D8^+ZOsz_eQ zv-3NEN(E9D6l4p%*j`OT9`-d9{;jPN2A=B-y}i8%kyQM8Q5@-Sjl=yEemmXWtE6yP z@}j6su$16U{+XQ}`2Kn`yPQJ)!w)>!j{s3BpP%4Kf}R*OZJ(c=9RO3mBV`t|DR#Bl z))Wnj4C|-25^pM+Lth_wXx@VQhl#!| z1y&@1S?_}!TD+P`MS1zYpdBk0@8Y5&*m;CRL{7ltI<2O?L6)G%xxBpmf!EC@r;cR$ zPIxXRDX9k>4flOdY2QAf$-a#%iGJtK9dIvo8gZ?0P^2kXme#fp#DW`lvl+TWiw%1p8J`EAQ@PER{b8AH65qj!fjFOe!R2?5?+)16O(K>x~6 z<1#63?kCo5Z@1#85h3Eo!FZs8|Ni?|_Qi|eRYFC&UYU-0x*KnV(+J|v+k<6G7&+;J z$2N1Jmb<>bMtxZ^!t>I%)ZnR}yR89G^P>D%L-suXHJ1VbU^`Lu(7wnw<_Q)4b0sAN zqm`SVA4+22{#M!Vd>Hsc@bJM$$^(MM9>pQY#rqZl1yg&j(L&~x;^zV&;C_nEzH$`tlKYaLrp@H{>OTGEeRZc&G783crI<7W>wdwmxLXkCAco2 zHSudaTeZR){i<@0iC3@khlO$z(pfB()T=JM#Sj>jk}{{L$Xd>K`(bz+$iCetO(&vF z=cA?j7qd5Fu1=pmetZhT6iB<=GreH|`ACc=zu5tPMeweG`M}m<3X%y1Zi`N=Hu492 zPx%(6*tsmNwBIYyMQUanhns6_%y4^57D|*m!lto8KAZRQ0oIA_fxAc?K5CU=S0@R! z@FoM!wc+jl`=ly{*^(`5?}A$C1yAICIU0EUov2xhCvGKWXFq^yiYks_q7b`BBkt8Z z+#%{;r>E`($`yHe=BoLb3eu-s&ubHA!v5yMGy({d$}%!{;IZXjphLQJ999%?0vvLE zHG7kWm?R~?%@=EAjDV>3&`}@)JaPRzIkGNReLgff88L0&WyFPtjv^^9IN#TUWgJ)W z7s_R@he^{n-8Cmk$bVx?TqL|))jIzMoAR$s%VMyaIMrN3AM#JFgrdGi#<(!RMv1uH z3c-KOgc?>=h_*cY;)nFVs}ly(J^KJPE3w)R2D>d{8NjRumC?4RE8SbX zfa&@-E?rb_90AJgkEkh2ca-wA_8_>_uqQPB1FL$`|FKHK=L2t2)~la|MIqKZA7`BR zUzGbKvJXW%3`?<292Kce_`=vM()U|0KF+mtd*jyiEoTD}IW#mRJ!@I2Wu?`unZR)r z|55peF0}fnYH5Yy1@PJYQQ#p{;u`0WF3q;#BJJ+&eIP38)3&FBNw5?c7&zs7vDI*O zJb4LYfaVbc=ZL*CR$><~CTvRgV5si!@81ES(}64g@lHb=LhN(J_Y#q zZ>5E@`UzdD6FxH6b3oNWGP86#d&AjOyV0+bKvvhmbuZ}SQp$G;z*9w)b#|`!DyaWl zBP=-5A%%V3$zviejXWV=i!U-bru2VURwZL&S_sLB{VS-`A;I+C>o2?_4}V*al;)Gq zyclK%)&3#te+crG_NNgTQC+}j--b{Nq)uFFDy8R6!A(flO0c1N$Hi(AUVy-id!U|u zUGQBB{@tCM>m33%W#aIi213DX= z6WoV_qAA%ZLm(f1ehM5Xn5Kbcd7+`v0Rl$h*ROd{8_|y_J;k#|69JBwnS~|1yGM#M zP@VNj_N%WAgO-bv@D{SEXNfV|(5S0!em(O~MTaK_y2OZ=959SfVX>yCdDDiIQ=R^I z&4*w|g9#Y1a4CtM-BuM@6$WEARV=vHbY#uevc9Z^%vO=8dlF4+*K~uA76x_g>;eLa zNAUbMU+WmnGs+Ko_U^92M6M;5j0;L!u~Qm6LjMWdSrjZ~*lrl$ZGrr9_wHRVpZ^BC z4ZOPla*-APa;mCaB?v^k#b)W}v#zm%ibCcWxYCOx z0X0Dl1hp3NC&y8#fo?5j|22)ftGc(G{_Ab$;gN*6s!$?FOfXOMvVQs(G~|nA;2Omu zg&jq!G%W797XbSX8&ys1(Zf5o96?RdEK4Ko(L>Cq<0vI58A&;x0u`_6i^P*u!GK3Zbs8ary5){_d ziAoPLFexnxe+oN2HU&93WZ-O|kf4UR^li2u;YsX}d-VrBs})DlinRE!OV^5|FoBZ;lbo84j>n73s|Ue&0pMkW z7uY>8kY7}UsT2P?A+8$vsl%LcxyB?SCV-a&;zUn>e=fMNtx4h~-+_r)oWr`>r^xst z!4V3=uMS4xMYvmSB`cJA0qUi&v*0Jn%DAkWC3A?8x8n^SE^8MMy%!9UK(b?N?5C{GCBHb?i8>R@V1sm*n(29)TF5yFN(zNK_;yNe)b zkg>4@uC{cv4Ob74T7bJP(W~JYvCp)K%T{zBJXW-dQWg2J%uWv$F9ew7PZ=*Wmipmb zV-e1SZMZ!qQ<3ijQygf~U?;oCB_GZVxzUgd5M?5N<*765l^~45q5^~n*3|^V56Pw? zY5+eMqRZ4BC?rXtrusKKV5WK-5D5pwc~7$Y2P$ z!@Q#)qSCMZSJB7J#+Cytmfhqx0lPJ}z`GDw!dru*N?09{)s-~;kE6>@8($aV`(+5`zu5D~Q5E4pCPA0z|yE!-1 z_dQ#FQr33ypK+1m*QDSqQ&qi(NUM;U<}egt2UB3&N~HN=QbPWVa5gW`eoc`XiZhjs z&K~kl`OKIvU%mvtKx|x`6vAGGNy5jWTv|k-{Hbsqq&wJ;kHo~Ndv{)9u3@}6%G(+q zE&lgwx$$#av_X@%5MT)T{YLmD|6@hgGVox4ap9If->(v+PC?)cAPeg%GNxacT!BBf zl@w00=kx%|8n8jC5(dL7t1YSx>RJxg$^-aO^{FuUNlH+KNe3q2)<*i^2<2xqVS!Xx_D1!&hINX5f?$){g% z%5OL7Smetu>Qqa5{s}s95V`p1NY%jLSCK|B@u+{Nf)(FY)T>v^l3OnV&>=QrWJ~fEg@7>IW?$3A+hoVTAmtz_ME%oi^v(rb z{h^YPJj1(gBE!j{e5K1O))stWdv)L~cK^TJ72ESYO8F7r_W5~Zh|U1M;Seqyc`{X* zD!ekT{QG0Rr3DG0N%}#J<$gSFaTe-#8j3m_YH4=*|4*J~uX!mtrUoG?l+FFDwoXh) zKu9`*u4{M>O=pa7sGU51-&MvPJf=kcE+3D@hiX$fj=&Q;Cu%QWDjFCVsQVchF-J><0yn6!ZK7Jfb6S^8SPjU14{4nlmx;ct0;rw$_3$pn-b zut5_1xI3@W4mosp)9@x4X4eKmfe$TD+~n^gtR=}9pDnK|wr)TceZVrXd4w>+LpMim zZZ6V_0?aKV1arFVmbjj3PNauXjkc5;- zLQ$}#9MJ9J?8?TZO(+{1G-R0rgbA?0h}aY!5dlkf-KJxa##d;4+QXDl=5?}No2K$- zJHhh>ra+a3vb}ZnAXKVjV**)*)?%t269zs16<<>ev=!g7_2uN|DsC_kNrHVs$;<08 zG?AE@nL+YLw5{;>Iu^=Q^rLNM{`4MJiIg#2+bNYE@cM0C{d;=)ecy62o(1XF;^**O z1Qvv0JMh7pRbRZoFJH~Th46&%v5a;F1Rz72&ht(+2-%svxthHV8I8pA!|tlOU%b&? zG;Hk!NcZ@N*o~MZHHh|FQm_39Ph(JQ7Z-?tp&5R)Wd#p$ zV{AkkU;xkrfxSv~>LiQHQ?n2jgR14AT2|*LXa&$L7>O59`Ae8}A&@7Q-k3iHd{1$A zSPwr$B{SwEcPhU!el#Y*5!&902#+Aw%OjpbQDC~NfVCZX1Vkfe7WVAp?b^Wo{r!iJ zyb0J0@a0=D;@Mob_(}jZPMf^sOZ~80(BQZuLPJ=d5FgLKDiIo&^n~UKBx}(Af!GYM zsHhOqD#@#mRbAXa{1?3NN`)DpIb2dtkK)+}O5@QY4XBK|Mty_3Dy&%Fxg1SX2}~pRPzxPSzC0FqhlWc55NjhZmFVBGgkc&5ppX;m zjUu+?;GjPg$=|;xUtW4o+o=>&&l5>iv*gMw2XnG^sV&Gv>_8#E}k-a)W+uC=wwm(Aydj)BfLwlm{|C2x!kX1S9$d zFR^Ij9`z>yF)>7`1go3%<9#k5?^UifDguw`X1ryW64PUoJ72z0wzye?lU>rnpw)5wC)M{j`kJEl(1mbuu`0&g3 zLL(rgPGg=Vh?$Bb9FiuAG$f3Q!9L!=y#OC9! z(q@A*2(1x-VBopRtVfF&^e})sn+u_^pM#f-XkdZfl?D;^C3botDDpgdw7Qi>GQb?(HsE*df`FSG_5XNaru{Yh0(D5`JX93iQKHABV2DZe zJzqy?;J_zKoHQZu62fT#XaL#h>ih^Xo|$0V4@&P!xN{97Y7!j6&;b`V1INtV+M)4?_|23P8n|lAA=6HI?KJB99F1aCwqh& z6+00UJ6j@vG1K?=z!^(rq^ig~Ks;Kd1=vFbjsJ3XJt*~?ArJKYK=cO`Na8pUf5*2u z1JFC1x`%9RU^tQ%08D628Yb8toKspFmEzEpgy7E+ljRZfs3UtW%%KX=2S1L;#lK41 zsoEH*@OO(1ufjV!W%dsan5J(aPYbW6dvQSL5e~G+Bj#t%Uakm$n=+AzRTD|#)?ZK< zfJ-Iy)Cabjo?r`w2^VDDSJ%))xeMdYlVBv-0eB`~$=v!oGxMu3KfM>xC~u<71oanu@h7PiTrkarSH6g2Gm-lu=s7#ei0nTONjbV0WI+VCA1dD z{$HULQBbDu?Lp)6KNvj)ZOC80Z?FJI@}ka`9*~^C(l|qlo@7$8QCFpF9bAL7+j3>v`(t#t&(X07X{hbfx1l_yE8F z3M@iFNl8v^t^2Q1)J8%6BO5#Y^z`O=Nt_jITwE6rA@-pz--ipd4iIX3Ug?D_y*ngR zSc)_FLI+}e{D0}2BNf*fR1*CbDO}N~N=iy2#|#qQm4}_P#6mz`tbz+TdAE^W@0;VK zT}_8&1U{QNaYX67_i>*|xly^cBop))%Ru0%V1pthywwODM41hMh;Dz1 zsSW;jP)n9Z^IrfslagvLNnd2%n*pL$s^ud#>j~GmN;C zs%tFRZJcBb4KubW{J@ow(D!N0_&MG^dbu`B-WMPO!|y0~j&!ckjzEq;9Cy7M9Rvjx zU?MmH5HJxYKZK(+(uS^Q?^oS$NEatMju~IlrFgbrw16wp?Sc$$wt4tJBYM`n#8uMhPCV6*ddcO zK-SfV2{fsG*Jr>A0GxFDVNyx(!$0??FrG#tK&U*iCtsJiLDbn)ZI_&3Wl14E9M1D%t{*^lP?puz)v0S~J){3)D+X!9>WgY#cyFaPn^XA| zL^5R{eF27#{Q1Mt!>08gN7W^`yiFlW0;dd^5TS5En~Y98f*#S*cS4=Lxjr#%tfK|j zcL!qd6JmA|zwKW{f$_J|O91F(Xex08i%dCG#{hA0TIixg+5lE`A*#_|q9;>>2L&X2 z|DV=Jum=aC;l_Ua_ycUy0}whfTM+3Aq;g>H>BLW4aii&hgF_xApIGU@twf8fGo>gR+XTU9Rdh~Z4fz~ z3xxC-yxk06ZaV9C*hk>O`Oy0nK&k?3j|c;`;C%xYb`gR=7Ery2q^IxP!$2wG9t|ua zf(mz>^*Mn-H0XFmd7v-C7%>TGn*m$H+=s{8a~s8_D+s$&)lDRZIgRSANFGlu89d5$ zKfu1V(9R6{^&BF$>?R@_5Fn8u>dEVq<_xX?CCA@*27Dq=&eywbEwsG74Q8T;uweT) z6ZHoL1>3ARbF;Tlq)|Rf|Dd@|j7RcBU1hM1n=pJL=ntusV&dm8xY`$)Kl9H^LINuZ z?pSLV$A?7khY~ICb@;2QA`9M=h_5w{HZG3jVxxQ1eBC-f8Y4|;PMUfl6#DsKrSX1q zC|1ISZ?o53Z0r%YQ0F&G&K?QUrJtr*qPRO>U4`J@|KPOP`Oy2oZxjj*lktjIKnBVH zP)~TtD@)$3;IScP6&vIf?+~5Egv5dc;F!72j=E7yn!w<+^MOlGT1%TrQoUU zX|1vv1_|?~_c)rGnfZ2s+04w$8b_zn^XD6f$ZDn;m;F-ZJ7)akV?4b6UDrQl zP((pX1_sOx1C9p^>@&BjrOtGK33&O{)azSjg*KZEEOq{u{d5DHzj6LGFiZ;m+x>9u z9wimkV|8-(Pe7eLpryS90--iA-PxZ%8}@Wf_Hy~0y9PJQKQjOM#MYJ#(k^XWkCB5z zsiQC023$7LS5{U`z*}$%=W%ashz{;XMzYiS36nujt`wuGKO;pmzkmARZDr-7cs88? z1d|}z^tHB@G$bTsYV+E6#aX^E$-3ahpFi6N8Y28X=!i&Jo`tXs11zM_b4eN0XiS}&x=ca88M$C)XT#*f7f0lW_;kIg8Z!ty=*O@1H?TlB4Kml=^#pcviV%b7;lBKk% zDIE)E*;xnzMGI*ljL<`D3ZSpCse})Mtyk`%IM-1c`Wz8K-id%4ya(Xs=*TwQn4&6+ z0cvkxM)AU)I=Z=aZco?OdtWv{_&^-0%}d@_y@a-t-xTM)J4qpg`ukiQX901To}T`< z{tf59fB(+Cw;>R#x%(V#X#5=?H{M_BiS98gC@N}$GDEmgQ%j44CrM#m8}cFu2logF zZo`djNOn>FVV;>t(FIf_BqZ?2Mhic}{? zkSTyjK?d10F)=Z-u8s^m!?kaAsK-LS6qn3D*~Hx2otN@HoeyPZW`^lrnD7cOwg#Xg zLY<$VmsL;*0v9dJNt=VI5bzJB4B&LkXn=aArluwpapwXX{4GO6L&yq7higMf6TS8S zCaST}a47xXzrPFaY&2LQI9SJLpSOTn2nr1~h29dd`Lx69x<^5Q<>lqY20l#*35j>_ z-*>Z4RBk^%H{n|3~d7Xy@gXE+h7zsdf_mBO!hPi563SFp zON@a4hb%ZWIEeJ@*|Wr?q}PJ0>-@ILV*-?xU-p-rg+5S=hQE7QrWPxX}96=!=4_H_>>c;xac(7rh6b1$cIwmHvDK;>v@7}#@0dg0qs#*k5 ziUU$0{O)xaUsEH%sE~*R7r%g435JN!2?~nTY-0_3ofNgcSHk-G`scRSOC736 zvg+APw~QzO`?9?$Rm;Oe=()VSF~}cYUKb=B58Fut z)mgS&L!H40KU z2QUPFpKa2kqa!W>fv$4XE-r@|5tuWH29Laj#|Cm!Yexs_-rnBxI+AR%wR>EuHum=R zwXW+g{FDRnkT&Z6l!4X(i4(jpKXT;ajI6ECf1ki~XEagKMjXUyc!Xw_iR&(NcRHRQ zTEUBN>+in{ErHg> z7LBTnWR{BEbs$sD<01na=D42Sxz^i=r-X5{aELW^8I|PaH|rqM;$UZtu_!|z4}SC! zu7Zt;i4lCgL1hQ!@GU{}1a$I>h=`QIv;bedi_XQp%;I7Me*jxZf4=dT z?y{)op=_3hfyL{)8Frt#NTtlp83O_XjcsfqAceF;IXhZ&t`D{oFr;i(C0rbAB*I}K z;O6EQ^t%>GO--#`+#1u+)MUA&(bv(D+gz0`h28KGE)sf{RptVoOaGfU5+FrB0r&h# zlRAs^)2Ap9DBytM!mt+@INvn9Fc{Q;X~1if7#kaq;df021216+eW(ia570eEoJ02` zs(J|`I^0ipX~$SNq3sXmIKk(Z0QBQ~I*$fG1_$1mb-la}99OkG1qnFAHFs1v66jz( zCtt{9_#Bf#o^m?=r=yT2tT8VNy&^E0lm-|8tW#=n-$>{!q!kp@_7ITbMTVpOR1VfhkHxw-u^N9n1W5=D*IaN(F$yZGF~q&UO+Js2jNqai?Pgz5sNsovUy5GU z`oI_7g@vgy-*#E-yqo*+Ep2izkAD!e9IU3Kq$HQ!1TW-m6PUTPdvqjiY;2sba}Nn| zwJh@O+vPAt>}QGqE(p-*=;%*iSPG+3YQe%|nvErh369V$h_Iz8T*rovB0Lapa ziNlaXJ7Ib7h=@?BsHom`m_Tji$c{9r7hV_#7f=8_N)G!cRF*yqWxPegH`Xj`=hA zAifuqvu>WhV2{(r$nHW*flF7Oat010CwfN4BI}|hqDO!RR#sO{*MH|4Ma}~NRKJPC zW5~v)p=EhM9C0gCD(rneJcW}D!z3K%|I^#G$3vO6;n%Ec6q;mPQJHtL6A3%jCg+TC zSZUXxwb~V&-#6(c>zc0@Q++Dd{0NF+8*bgAx!jrPjezcctDpzl zZDUH8>_1w1kfJmbzYy4O+=i1^%z%@|9UOiA`o*6f=UfbmkNFFNek(c`@cs8j*=GKS z5ZBb<5GIzDh1A)laRu{hEgg?AJw!B3Ux^v#qQ%?2ZZEpT`S@+!*{TfqZH=Z3{U#(o ziE0-LHY_9H&liu0S`rgeX%SBJ8ylmiEb!f%65 zm-UWm+v0m!`*-ZVB=?yT)~7@dP3~|n+8{?GRJmsJyIkILaxi6H@$x;Hs2B)Vhc%b! z)C0jOhlV;UH`SqG1bpsN9&a9^6uft0L4iJ?IV}ED*OF-t0XT6CEvWX-QqP zPogpeXvHsLVIYbWZLKY}B_nh3p}A{9b+xp#s7pc1&Q?^gbam6p%gc9EJ{$jwIcJW8 zk58_A40pK)Pn|V8$u-ZMzG0scujcWyM8YC9&e;DzPdWn5JbaNvZ3NQnu$w=OylKrc z#-2=j^vDOY*8t$y=9ZR$8p#TJaAcrD38PM`ar^rE${k0SWw`=Kw!P_|(b0N$a&nSw zY<%V$5t1zOd-t?}=6x=0&EzX%`ET`R;C!~AgpyuxKsh{o(Bp*DPJLutyDYFnv2cZ7 zY;-i+!eZgSAmC8Ec##fLO&1efOLWP(-mn34H8vH5-TV>H7TGJDu=9{Oblr&k1iD~8 zTxM9An@d6A3^iQ*6?%IG21c%}5y}{FtLxNO4eFcSpNIUe8YaSduZ8Q|50WkDs~m|f zK<%CPPg`2la6_wVYRDEGLyR}k+s^#nVc|M{if46{fXvJW#@&zQNRb%RN-gOeA{>T? zhip$*-ClpkRor6@7%G4?himswL13f+zNS!5w`UC(cm2cLpd@5@En<}f=x3pNz5XJF zap|nyi6?gij|ocwNLx}?wv^4L)^&0vIE=QNV>^*~0k2L{GeZ}7Y6BWD5d4j&mo4^) zuL;Y2`0yc8N^)?usbH$VcFWjW2z|$k&dYOw5v3TV4OF9^Dv+BvMDMzoy>WSxyP+6g za7n->Vtx^SR~YhI&F$?CsAdg+*7`BwO~mL&2^;x!++J~Ha;ucZ-{@qT0~+(`I>>HJ zH(&ehSqw6!s4Pr6e0VWLc1FoYK z!^0Y2Hc@X#N%3hyk?4Max|x~Tj?FKjiA*H#P*bx2EwpnjBDnBfyj?GRzJ8)bKPjdp z5?h0uHkC_RnbF7GxDIF+0~{Uj!w(($US4x#GT9`Uj>PpjrZ?Q#6MZf7*IHU~fT5v3 z^ypDnqXnyeT`J_im1Jjo?^n#F;E7>i!ck-$mz%YF>&ui^^>giN!xx18T4fQxh3PzRFR%N<{n;&n6`+(c|L4=E^uSs4 zV@cR>Y;`Kct?A_dh63CCe2a+E&TWWMS{E%^gfRk3^49L;SnYXxDJ}TSO^^~kE-uW8 zZ@Gtg7&Kag@U!1g@bVfk`kzquA8*(f%DMyI|vr1--JwS+ME#NO|!BV@3pAG3 zwYnX_Er*MCp{HI@iffT^ZcOThnq2H+;F-r$^N1q* zd_D9klD216moM*!?WNe*?kkU~iyG7Sxft*fr(S+n4MmyfDQ)COX7{4w7;a4hFB$qP zG*eZKD$!JHG#-gc;8vvgmhBi9gSx4#Gy}3pXlM`+Nf8b*aNFtk3`q_LUF;FNxEN@) z=kiY6kASj3`+_HhDBpRL5g*ZlnUKG`rJ>Gkl5kHCBy9=NeQ@XVUup1svd&JubRLNY5j|2J58qbnleV!SQ}71FvtpQTy-cQpVv%U{ zRVYI40b-8nx1N$EBxpcr1RW;vXUM*x%53`h!}y{zXAT&7m6E8&3ImHWG3z6UTWv@7 zug6R{4WLL8-zSOCuZ9O_t?cb3ERSh;TT9D6v;st0di3FUVJZ}~v15?QN=XNX>gvfi zzxyi{3aZe=!7$4e{Ej?GHR8t~Ntp6>sFFM-gsdTVjwhkjUu5L-f@({&ZnzZ|H?BuX zj~IE4qBxU=p3Xn+ub}-ACM#AL8b3&dTZZ}tvDn9m%L!D(!2r-yx%40l!p_x%>@N&G z>>MjG!6*9pX%@LNkr5iX(-R}odUd+!rMCFBN~6v<6<2RaFXCh4Yn)zz`lLE`QNUSz zHcNQ7*b(O{$;Chp?uQqGIai^1u22#t$H!AUrT}%wkiGBBbMx~L(hkW&&@5He)z5Cf z!?CooUTHA6!88YL^iSTO2yV~a2Z~4xlD!F!rEF}|ss9?>)V(9Yja84n?N2F~bsSL{ z1ss5S26AR3d^|wt92eWy~;ju=6RHCO<$YAZ#GUKk>aLU zC)QSDmGQ|j(a3o(2!SAym0LI@tJ@2Og>oHiQ()eGSRNH#tw7E#eM8 Date: Tue, 19 Sep 2017 12:44:38 +0800 Subject: [PATCH 210/335] Update experimental results for DS2. --- README.md | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 9e9113d84..bfc43768a 100644 --- a/README.md +++ b/README.md @@ -426,7 +426,7 @@ python deploy/demo_client.py --help Language | Model Name | Training Data | Training Hours :-----------: | :------------: | :----------: | -------: English | [LibriSpeech Model](http://cloud.dlnel.org/filepub/?uuid=17404caf-cf19-492f-9707-1fad07c19aae) | [LibriSpeech Dataset](http://www.openslr.org/12/) | 960 h -English | [Internal English Model](to-be-added) | Baidu English Dataset | 8000 h +English | [Internal English Model](to-be-added) | Baidu English Dataset | 8628 h Mandarin | [Aishell Model](http://cloud.dlnel.org/filepub/?uuid=6c83b9d8-3255-4adf-9726-0fe0be3d0274) | [Aishell Dataset](http://www.openslr.org/33/) | 151 h Mandarin | [Internal Mandarin Model](to-be-added) | Baidu Mandarin Dataset | 2917 h @@ -434,30 +434,21 @@ Mandarin | [Internal Mandarin Model](to-be-added) | Baidu Mandarin Dataset | 291 Language Model | Training Data | Token-based | Size | Filter Configuraiton :-------------:| :------------:| :-----: | -----: | -----------------: -[English LM (Median)](http://paddlepaddle.bj.bcebos.com/model_zoo/speech/common_crawl_00.prune01111.trie.klm) | To Be Added | Word-based | 8.3 GB | To Be Added -[English LM (Big)](to-be-added) | To Be Added | Word-based | X.X GB | To Be Added -[Mandarin LM (Median)](http://cloud.dlnel.org/filepub/?uuid=d21861e4-4ed6-45bb-ad8e-ae417a43195e) | To Be Added | Character-based | 2.8 GB | To Be Added -[Mandarin LM (Big)](to-be-added) | To Be Added | Character-based | X.X GB | To Be Added +[English LM](http://paddlepaddle.bj.bcebos.com/model_zoo/speech/common_crawl_00.prune01111.trie.klm) | To Be Added | Word-based | 8.3 GB | To Be Added +[Mandarin LM](http://cloud.dlnel.org/filepub/?uuid=d21861e4-4ed6-45bb-ad8e-ae417a43195e) | To Be Added | Character-based | 2.8 GB | To Be Added ## Experiments and Benchmarks #### English Model Evaluation (Word Error Rate) Test Set | LibriSpeech Model | Internal English Model -:---------------------: | :---------------: | :-------------------: -LibriSpeech-Test-Clean | 7.9 | X.X -LibriSpeech-Test-Other | X.X | X.X +:---------------------: | ---------------: | -------------------: +LibriSpeech-Test-Clean | 7.96 | X.X +LibriSpeech-Test-Other | 23.87 | X.X VoxForge-Test | X.X | X.X Baidu-English-Test | X.X | X.X -#### English Model Evaluation (Character Error Rate) - -Test Set | LibriSpeech Model | Internal English Model -:---------------------: | :---------------: | :-------------------: -LibriSpeech-Test-Clean | X.X | X.X -LibriSpeech-Test-Other | X.X | X.X -VoxForge-Test | X.X | X.X -Baidu-English-Test | X.X | X.X +(Beam size=2000) #### Mandarin Model Evaluation (Character Error Rate) @@ -468,7 +459,7 @@ Baidu-Mandarin-Test | X.X | X.X #### Acceleration with Multi-GPUs -We compare the training time with 1, 2, 4, 8, 16 Tesla K40m GPUs (with a subset of LibriSpeech samples whose audio durations are between 6.0 and 7.0 seconds). And it shows that a **near-linear** acceleration with multiple GPUs has been achieved. In the following figure, the time (in seconds) used for training is plotted on the blue bars. +We compare the training time with 1, 2, 4, 8, 16 Tesla K40m GPUs (with a subset of LibriSpeech samples whose audio durations are between 6.0 and 7.0 seconds). And it shows that a **near-linear** acceleration with multiple GPUs has been achieved. In the following figure, the time (in seconds) cost for training is printed on the blue bars.
From e9a42044f578084901b86bd1b6fc9bb3dec1d61f Mon Sep 17 00:00:00 2001 From: yangyaming Date: Tue, 19 Sep 2017 13:02:31 +0800 Subject: [PATCH 211/335] Add data preparing for Aishell. --- data/aishell/aishell.py | 109 +++++++++++++++++++++++++++++++ data/librispeech/librispeech.py | 4 +- data_utils/utility.py | 1 + examples/aishell/run_data.sh | 42 ++++++++++++ examples/librispeech/run_data.sh | 2 +- 5 files changed, 155 insertions(+), 3 deletions(-) create mode 100644 data/aishell/aishell.py create mode 100644 examples/aishell/run_data.sh diff --git a/data/aishell/aishell.py b/data/aishell/aishell.py new file mode 100644 index 000000000..17786b5d4 --- /dev/null +++ b/data/aishell/aishell.py @@ -0,0 +1,109 @@ +"""Prepare Aishell mandarin dataset + +Download, unpack and create manifest files. +Manifest file is a json-format file with each line containing the +meta data (i.e. audio filepath, transcript and audio duration) +of each audio file in the data set. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import codecs +import soundfile +import json +import argparse +from data_utils.utility import download, unpack + +DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech') + +URL_ROOT = 'http://www.openslr.org/resources/33' +DATA_URL = URL_ROOT + '/data_aishell.tgz' +MD5_DATA = '2f494334227864a8a8fec932999db9d8' + +parser = argparse.ArgumentParser(description=__doc__) +parser.add_argument( + "--target_dir", + default=DATA_HOME + "/Aishell", + type=str, + help="Directory to save the dataset. (default: %(default)s)") +parser.add_argument( + "--manifest_prefix", + default="manifest", + type=str, + help="Filepath prefix for output manifests. (default: %(default)s)") +args = parser.parse_args() + + +def create_manifest(data_dir, manifest_path_prefix): + print("Creating manifest %s ..." % manifest_path_prefix) + json_lines = [] + transcript_path = os.path.join(data_dir, 'transcript', + 'aishell_transcript_v0.8.txt') + transcript_dict = {} + for line in codecs.open(transcript_path, 'r', 'utf-8'): + line = line.strip() + if line == '': continue + audio_id, text = line.split(' ', 1) + # remove withespace + text = ''.join(text.split()) + transcript_dict[audio_id] = text + + data_types = ['train', 'dev', 'test'] + for type in data_types: + audio_dir = os.path.join(data_dir, 'wav', type) + for subfolder, _, filelist in sorted(os.walk(audio_dir)): + for fname in filelist: + audio_path = os.path.join(subfolder, fname) + audio_id = fname[:-4] + # if no transcription for audio then skipped + if audio_id not in transcript_dict: + continue + audio_data, samplerate = soundfile.read(audio_path) + duration = float(len(audio_data) / samplerate) + text = transcript_dict[audio_id] + json_lines.append( + json.dumps( + { + 'audio_filepath': audio_path, + 'duration': duration, + 'text': text + }, + ensure_ascii=False)) + manifest_path = manifest_path_prefix + '.' + type + with codecs.open(manifest_path, 'w', 'utf-8') as fout: + for line in json_lines: + fout.write(line + '\n') + + +def prepare_dataset(url, md5sum, target_dir, manifest_path): + """Download, unpack and create manifest file.""" + data_dir = os.path.join(target_dir, 'data_aishell') + if not os.path.exists(data_dir): + filepath = download(url, md5sum, target_dir) + unpack(filepath, target_dir) + # unpack all audio tar files + audio_dir = os.path.join(data_dir, 'wav') + for subfolder, _, filelist in sorted(os.walk(audio_dir)): + for ftar in filelist: + unpack(os.path.join(subfolder, ftar), subfolder, True) + else: + print("Skip downloading and unpacking. Data already exists in %s." % + target_dir) + create_manifest(data_dir, manifest_path) + + +def main(): + if args.target_dir.startswith('~'): + args.target_dir = os.path.expanduser(args.target_dir) + + prepare_dataset( + url=DATA_URL, + md5sum=MD5_DATA, + target_dir=args.target_dir, + manifest_path=args.manifest_prefix) + + +if __name__ == '__main__': + main() diff --git a/data/librispeech/librispeech.py b/data/librispeech/librispeech.py index 79cc3de87..9a8e1c287 100644 --- a/data/librispeech/librispeech.py +++ b/data/librispeech/librispeech.py @@ -16,7 +16,6 @@ import argparse import soundfile import json import codecs -from paddle.v2.dataset.common import md5file from data_utils.utility import download, unpack URL_ROOT = "http://www.openslr.org/resources/12" @@ -104,7 +103,8 @@ def prepare_dataset(url, md5sum, target_dir, manifest_path): def main(): - args.target_dir = os.path.expanduser(args.target_dir) + if args.target_dir.startswith('~'): + args.target_dir = os.path.expanduser(args.target_dir) prepare_dataset( url=URL_TEST_CLEAN, diff --git a/data_utils/utility.py b/data_utils/utility.py index e1e3b55e7..da7b66ef2 100644 --- a/data_utils/utility.py +++ b/data_utils/utility.py @@ -7,6 +7,7 @@ import json import codecs import os import tarfile +from paddle.v2.dataset.common import md5file def read_manifest(manifest_path, max_duration=float('inf'), min_duration=0.0): diff --git a/examples/aishell/run_data.sh b/examples/aishell/run_data.sh new file mode 100644 index 000000000..db27c5300 --- /dev/null +++ b/examples/aishell/run_data.sh @@ -0,0 +1,42 @@ +#! /usr/bin/env bash + +pushd ../.. > /dev/null + +# download data, generate manifests +PYTHONPATH=.:$PYTHONPATH python data/aishell/aishell.py \ +--manifest_prefix='data/aishell/manifest' \ +--target_dir='~/.cache/paddle/dataset/speech/Aishell' + +if [ $? -ne 0 ]; then + echo "Prepare Aishell failed. Terminated." + exit 1 +fi + + +# build vocabulary +python tools/build_vocab.py \ +--count_threshold=0 \ +--vocab_path='data/aishell/vocab.txt' \ +--manifest_paths='data/aishell/manifest.train' + +if [ $? -ne 0 ]; then + echo "Build vocabulary failed. Terminated." + exit 1 +fi + + +# compute mean and stddev for normalizer +python tools/compute_mean_std.py \ +--manifest_path='data/aishell/manifest.train' \ +--num_samples=2000 \ +--specgram_type='linear' \ +--output_path='data/aishell/mean_std.npz' + +if [ $? -ne 0 ]; then + echo "Compute mean and stddev failed. Terminated." + exit 1 +fi + + +echo "Aishell data preparation done." +exit 0 diff --git a/examples/librispeech/run_data.sh b/examples/librispeech/run_data.sh index bdd5abb58..957416f43 100644 --- a/examples/librispeech/run_data.sh +++ b/examples/librispeech/run_data.sh @@ -3,7 +3,7 @@ pushd ../.. > /dev/null # download data, generate manifests -python data/librispeech/librispeech.py \ +PYTHONPATH=.:$PYPYTHONPATH python data/librispeech/librispeech.py \ --manifest_prefix='data/librispeech/manifest' \ --target_dir='~/.cache/paddle/dataset/speech/Libri' \ --full_download='True' From cc3570d406c575bc838e3b8d4383cd6436a60cd3 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Tue, 19 Sep 2017 13:03:04 +0800 Subject: [PATCH 212/335] format some writings --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 537eec0b6..d58d0c51f 100644 --- a/README.md +++ b/README.md @@ -296,14 +296,14 @@ The hyper-parameters $\alpha$ (language model weight) and $\beta$ (word insertio ```bash python tools/tune.py --use_gpu False ``` - The grid search will log the WER (word error rate) or CER (character error rate) at each point in the hyper-parameter space and their minima, and draw the error surface optionally. A proper hyper-parameters range should include the global minima of the error surface for WER/CER, as illustrated in the following figure. + The grid search will print the WER (word error rate) or CER (character error rate) at each point in the hyper-parameters space, and draw the error surface optionally. A proper hyper-parameters range should include the global minima of the error surface for WER/CER, as illustrated in the following figure.

- +
An example error surface for tuning on the dev-clean set of LibriSpeech

-Usually, as the figure shows the variation of language model weight ($alpha$) mainly affect the performance of CTC beam search decoder. And a better procedure is first tuning on serveral data batches (the number can be specified) to find out the proper range of hyper-parameters, then change to the whole validataion set to carray out an accurate tuning. +Usually, as the figure shows, the variation of language model weight ($\alpha$) significantly affect the performance of CTC beam search decoder. And a better procedure is to first tune on serveral data batches (the number can be specified) to find out the proper range of hyper-parameters, then change to the whole validation set to carray out an accurate tuning. After tuning, you can reset $\alpha$ and $\beta$ in the inference and evaluation modules to see if they really help improve the ASR performance. For more help From 7b9385899ba9308e29ac1d1c796ade284bbe0213 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Tue, 19 Sep 2017 13:25:54 +0800 Subject: [PATCH 213/335] Add training script. --- examples/aishell/run_train.sh | 41 +++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 examples/aishell/run_train.sh diff --git a/examples/aishell/run_train.sh b/examples/aishell/run_train.sh new file mode 100644 index 000000000..76d75fa29 --- /dev/null +++ b/examples/aishell/run_train.sh @@ -0,0 +1,41 @@ +#! /usr/bin/env bash + +pushd ../.. > /dev/null + +# train model +# if you wish to resume from an exists model, uncomment --init_model_path +CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ +python -u train.py \ +--batch_size=64 \ +--trainer_count=8 \ +--num_passes=50 \ +--num_proc_data=12 \ +--num_conv_layers=2 \ +--num_rnn_layers=3 \ +--rnn_layer_size=1024 \ +--num_iter_print=100 \ +--learning_rate=5e-4 \ +--max_duration=27.0 \ +--min_duration=0.0 \ +--test_off=False \ +--use_sortagrad=True \ +--use_gru=False \ +--use_gpu=True \ +--is_local=True \ +--share_rnn_weights=False \ +--train_manifest='data/aishell/manifest.train' \ +--dev_manifest='data/aishell/manifest.dev' \ +--mean_std_path='data/aishell/mean_std.npz' \ +--vocab_path='data/aishell/vocab.txt' \ +--output_model_dir='./checkpoints/aishell' \ +--augment_conf_path='conf/augmentation.config' \ +--specgram_type='linear' \ +--shuffle_method='batch_shuffle_clipped' + +if [ $? -ne 0 ]; then + echo "Failed in training!" + exit 1 +fi + + +exit 0 From a235b3532d9dd0f7b9d4546054df078652380c91 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Tue, 19 Sep 2017 15:24:32 +0800 Subject: [PATCH 214/335] Add test script. --- examples/aishell/run_test.sh | 47 ++++++++++++++++++++++++ examples/aishell/run_test_golden.sh | 56 +++++++++++++++++++++++++++++ 2 files changed, 103 insertions(+) create mode 100644 examples/aishell/run_test.sh create mode 100644 examples/aishell/run_test_golden.sh diff --git a/examples/aishell/run_test.sh b/examples/aishell/run_test.sh new file mode 100644 index 000000000..bfbbcad54 --- /dev/null +++ b/examples/aishell/run_test.sh @@ -0,0 +1,47 @@ +#! /usr/bin/env bash + +pushd ../.. > /dev/null + +# download language model +pushd models/lm > /dev/null +sh download_lm_ch.sh +if [ $? -ne 0 ]; then + exit 1 +fi +popd > /dev/null + + +# evaluate model +CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ +python -u test.py \ +--batch_size=128 \ +--trainer_count=8 \ +--beam_size=300 \ +--num_proc_bsearch=8 \ +--num_proc_data=4 \ +--num_conv_layers=2 \ +--num_rnn_layers=3 \ +--rnn_layer_size=1024 \ +--alpha=1.4 \ +--beta=2.4 \ +--cutoff_prob=0.99 \ +--cutoff_top_n=40 \ +--use_gru=False \ +--use_gpu=True \ +--share_rnn_weights=False \ +--test_manifest='data/aishell/manifest.test' \ +--mean_std_path='data/aishell/mean_std.npz' \ +--vocab_path='data/aishell/vocab.txt' \ +--model_path='checkpoints/aishell/params.latest.tar.gz' \ +--lang_model_path='models/lm/zh_giga.no_cna_cmn.prune01244.klm' \ +--decoding_method='ctc_beam_search' \ +--error_rate_type='cer' \ +--specgram_type='linear' + +if [ $? -ne 0 ]; then + echo "Failed in evaluation!" + exit 1 +fi + + +exit 0 diff --git a/examples/aishell/run_test_golden.sh b/examples/aishell/run_test_golden.sh new file mode 100644 index 000000000..e40e8040c --- /dev/null +++ b/examples/aishell/run_test_golden.sh @@ -0,0 +1,56 @@ +#! /usr/bin/env bash + +pushd ../.. > /dev/null + +# download language model +pushd models/lm > /dev/null +sh download_lm_ch.sh +if [ $? -ne 0 ]; then + exit 1 +fi +popd > /dev/null + + +# download well-trained model +pushd models/aishell > /dev/null +sh download_model.sh +if [ $? -ne 0 ]; then + exit 1 +fi +popd > /dev/null + + +# evaluate model +CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ +python -u test.py \ +--batch_size=128 \ +--trainer_count=8 \ +--beam_size=300 \ +--num_proc_bsearch=8 \ +--num_proc_data=4 \ +--num_conv_layers=2 \ +--num_rnn_layers=3 \ +--rnn_layer_size=1024 \ +--alpha=1.4 \ +--beta=2.4 \ +--cutoff_prob=0.99 \ +--cutoff_top_n=40 \ +--use_gru=False \ +--use_gpu=True \ +--share_rnn_weights=False \ +--test_manifest='data/aishell/manifest.test' \ +--mean_std_path='models/aishell/mean_std.npz' \ +--vocab_path='models/aishell/vocab.txt' \ +--model_path='models/aishell/params.tar.gz' \ +--lang_model_path='models/lm/zh_giga.no_cna_cmn.prune01244.klm' \ +--decoding_method='ctc_beam_search' \ +--error_rate_type='cer' \ +--specgram_type='linear' + +if [ $? -ne 0 ]; then + echo "Failed in evaluation!" + exit 1 +fi + + +exit 0 From 0a5f123764d0a7546dcd583e32b1986db4215425 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Tue, 19 Sep 2017 15:41:34 +0800 Subject: [PATCH 215/335] Add inference scripts. --- examples/aishell/run_infer.sh | 46 +++++++++++++++++++++++ examples/aishell/run_infer_golden.sh | 55 ++++++++++++++++++++++++++++ 2 files changed, 101 insertions(+) create mode 100644 examples/aishell/run_infer.sh create mode 100644 examples/aishell/run_infer_golden.sh diff --git a/examples/aishell/run_infer.sh b/examples/aishell/run_infer.sh new file mode 100644 index 000000000..332bdbe1b --- /dev/null +++ b/examples/aishell/run_infer.sh @@ -0,0 +1,46 @@ +#! /usr/bin/env bash + +pushd ../.. > /dev/null + +# download language model +pushd models/lm > /dev/null +sh download_lm_ch.sh +if [ $? -ne 0 ]; then + exit 1 +fi +popd > /dev/null + + +# infer +CUDA_VISIBLE_DEVICES=0 \ +python -u infer.py \ +--num_samples=10 \ +--trainer_count=1 \ +--beam_size=300 \ +--num_proc_bsearch=8 \ +--num_conv_layers=2 \ +--num_rnn_layers=3 \ +--rnn_layer_size=1024 \ +--alpha=1.4 \ +--beta=2.4 \ +--cutoff_prob=0.99 \ +--cutoff_top_n=40 \ +--use_gru=False \ +--use_gpu=True \ +--share_rnn_weights=False \ +--infer_manifest='data/aishell/manifest.test' \ +--mean_std_path='data/aishell/mean_std.npz' \ +--vocab_path='data/aishell/vocab.txt' \ +--model_path='checkpoints/aishell/params.latest.tar.gz' \ +--lang_model_path='models/lm/zh_giga.no_cna_cmn.prune01244.klm' \ +--decoding_method='ctc_beam_search' \ +--error_rate_type='cer' \ +--specgram_type='linear' + +if [ $? -ne 0 ]; then + echo "Failed in inference!" + exit 1 +fi + + +exit 0 diff --git a/examples/aishell/run_infer_golden.sh b/examples/aishell/run_infer_golden.sh new file mode 100644 index 000000000..ac79a4dde --- /dev/null +++ b/examples/aishell/run_infer_golden.sh @@ -0,0 +1,55 @@ +#! /usr/bin/env bash + +pushd ../.. > /dev/null + +# download language model +pushd models/lm > /dev/null +sh download_lm_ch.sh +if [ $? -ne 0 ]; then + exit 1 +fi +popd > /dev/null + + +# download well-trained model +pushd models/aishell > /dev/null +sh download_model.sh +if [ $? -ne 0 ]; then + exit 1 +fi +popd > /dev/null + + +# infer +CUDA_VISIBLE_DEVICES=0 \ +python -u infer.py \ +--num_samples=10 \ +--trainer_count=1 \ +--beam_size=300 \ +--num_proc_bsearch=8 \ +--num_conv_layers=2 \ +--num_rnn_layers=3 \ +--rnn_layer_size=1024 \ +--alpha=1.4 \ +--beta=2.4 \ +--cutoff_prob=0.99 \ +--cutoff_top_n=40 \ +--use_gru=False \ +--use_gpu=True \ +--share_rnn_weights=False \ +--infer_manifest='data/aishell/manifest.test' \ +--mean_std_path='models/aishell/mean_std.npz' \ +--vocab_path='models/aishell/vocab.txt' \ +--model_path='models/aishell/params.tar.gz' \ +--lang_model_path='models/lm/zh_giga.no_cna_cmn.prune01244.klm' \ +--decoding_method='ctc_beam_search' \ +--error_rate_type='cer' \ +--specgram_type='linear' + +if [ $? -ne 0 ]; then + echo "Failed in inference!" + exit 1 +fi + + +exit 0 From 35543fff8bdc9daea987bb51046a26c67018c51d Mon Sep 17 00:00:00 2001 From: yangyaming Date: Tue, 19 Sep 2017 16:44:25 +0800 Subject: [PATCH 216/335] Add doc for english LM. --- README.md | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 8b0da1ae2..38567c364 100644 --- a/README.md +++ b/README.md @@ -219,6 +219,18 @@ sh download_lm_ch.sh ``` If you wish to train your own better language model, please refer to [KenLM](https://github.com/kpu/kenlm) for tutorials. +Here we provide some tips to show how we prepearing our english and mandarin language models. + +#### English LM + +The english corpus is from the [Common Crawl Repository](http://commoncrawl.org) and you can download it from [statmt](http://data.statmt.org/ngrams/deduped_en). We use part en.00 to train our english languge model. There are some preprocessing steps before training: + + * Characters which not in [A-Za-z0-9\s'] are removed and arabic numbers are converted to english numbers like 1000 to one thousand. + * Repeated whitespace are squeezed to one and the beginning whitespace are removed. Notice that all transcriptions are lowercase, so all characters are converted to lowercases. + * Top 400000 words by frequency are selected to build the vocabulary and all words not in the vocabulary are replaced with 'UNKNOWNWORD'. + +Now the preprocessing is done and we get a clean corpus to train the language model. Our released language model are pruned by '0 1 1 1 1'. To save disk storage we convert the arpa file to 'trie' binary file with parameters '-a 22 -q 8 -b 8'. + TODO: any other requirements or tips to add? ### Speech-to-text Inference @@ -296,7 +308,7 @@ The hyper-parameters $\alpha$ (language model weight) and $\beta$ (word insertio ```bash python tools/tune.py --use_gpu False ``` - The grid search will print the WER (word error rate) or CER (character error rate) at each point in the hyper-parameters space, and draw the error surface optionally. A proper hyper-parameters range should include the global minima of the error surface for WER/CER, as illustrated in the following figure. + The grid search will print the WER (word error rate) or CER (character error rate) at each point in the hyper-parameters space, and draw the error surface optionally. A proper hyper-parameters range should include the global minima of the error surface for WER/CER, as illustrated in the following figure.

From 4c64841c16b14f88a7100c80ccb6bc110a2c21de Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Tue, 19 Sep 2017 18:09:16 +0800 Subject: [PATCH 217/335] add tuning script & enable ploting error surface --- examples/librispeech/run_tune.sh | 22 ++--- tools/tune.py | 147 +++++++++++++++++++++++-------- 2 files changed, 121 insertions(+), 48 deletions(-) diff --git a/examples/librispeech/run_tune.sh b/examples/librispeech/run_tune.sh index abc28d366..48ddb0029 100644 --- a/examples/librispeech/run_tune.sh +++ b/examples/librispeech/run_tune.sh @@ -5,27 +5,29 @@ pushd ../.. > /dev/null # grid-search for hyper-parameters in language model CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ python -u tools/tune.py \ ---num_samples=100 \ +--num_batches=2 \ +--batch_size=24 \ --trainer_count=8 \ --beam_size=500 \ --num_proc_bsearch=12 \ --num_conv_layers=2 \ --num_rnn_layers=3 \ --rnn_layer_size=2048 \ ---num_alphas=14 \ ---num_betas=20 \ ---alpha_from=0.1 \ ---alpha_to=0.36 \ ---beta_from=0.05 \ ---beta_to=1.0 \ ---cutoff_prob=0.99 \ +--num_alphas=2 \ +--num_betas=2 \ +--alpha_from=1.0 \ +--alpha_to=3.2 \ +--beta_from=0.1 \ +--beta_to=0.45 \ +--cutoff_prob=1.0 \ +--cutoff_top_n=40 \ --use_gru=False \ --use_gpu=True \ --share_rnn_weights=True \ --tune_manifest='data/librispeech/manifest.dev-clean' \ --mean_std_path='data/librispeech/mean_std.npz' \ ---vocab_path='data/librispeech/vocab.txt' \ ---model_path='checkpoints/libri/params.latest.tar.gz' \ +--vocab_path='models/librispeech/vocab.txt' \ +--model_path='models/librispeech/params.tar.gz' \ --lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \ --error_rate_type='wer' \ --specgram_type='linear' diff --git a/tools/tune.py b/tools/tune.py index 96c25a3eb..c2e42cd9f 100644 --- a/tools/tune.py +++ b/tools/tune.py @@ -3,6 +3,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import sys import numpy as np import argparse import functools @@ -16,26 +17,30 @@ from utils.utility import add_arguments, print_arguments parser = argparse.ArgumentParser(description=__doc__) add_arg = functools.partial(add_arguments, argparser=parser) # yapf: disable -add_arg('num_samples', int, 100, "# of samples to infer.") -add_arg('trainer_count', int, 8, "# of Trainers (CPUs or GPUs).") -add_arg('beam_size', int, 500, "Beam search width.") -add_arg('num_proc_bsearch', int, 12, "# of CPUs for beam search.") -add_arg('num_conv_layers', int, 2, "# of convolution layers.") -add_arg('num_rnn_layers', int, 3, "# of recurrent layers.") -add_arg('rnn_layer_size', int, 2048, "# of recurrent cells per layer.") -add_arg('num_alphas', int, 14, "# of alpha candidates for tuning.") -add_arg('num_betas', int, 20, "# of beta candidates for tuning.") -add_arg('alpha_from', float, 0.1, "Where alpha starts tuning from.") -add_arg('alpha_to', float, 0.36, "Where alpha ends tuning with.") -add_arg('beta_from', float, 0.05, "Where beta starts tuning from.") -add_arg('beta_to', float, 1.0, "Where beta ends tuning with.") -add_arg('cutoff_prob', float, 0.99, "Cutoff probability for pruning.") -add_arg('use_gru', bool, False, "Use GRUs instead of simple RNNs.") -add_arg('use_gpu', bool, True, "Use GPU or not.") -add_arg('share_rnn_weights',bool, True, "Share input-hidden weights across " - "bi-directional RNNs. Not for GRU.") +add_arg('num_batches', int, -1, "# of batches tuning on. " + "Default -1, on whole dev set.") +add_arg('batch_size', int, 256, "# of samples per batch.") +add_arg('trainer_count', int, 8, "# of Trainers (CPUs or GPUs).") +add_arg('beam_size', int, 500, "Beam search width.") +add_arg('num_proc_bsearch', int, 12, "# of CPUs for beam search.") +add_arg('num_conv_layers', int, 2, "# of convolution layers.") +add_arg('num_rnn_layers', int, 3, "# of recurrent layers.") +add_arg('rnn_layer_size', int, 2048, "# of recurrent cells per layer.") +add_arg('num_alphas', int, 45, "# of alpha candidates for tuning.") +add_arg('num_betas', int, 8, "# of beta candidates for tuning.") +add_arg('alpha_from', float, 1.0, "Where alpha starts tuning from.") +add_arg('alpha_to', float, 3.2, "Where alpha ends tuning with.") +add_arg('beta_from', float, 0.1, "Where beta starts tuning from.") +add_arg('beta_to', float, 0.45, "Where beta ends tuning with.") +add_arg('cutoff_prob', float, 1.0, "Cutoff probability for pruning.") +add_arg('cutoff_top_n', int, 40, "Cutoff number for pruning.") +add_arg('output_fig', bool, True, "Output error rate figure or not.") +add_arg('use_gru', bool, False, "Use GRUs instead of simple RNNs.") +add_arg('use_gpu', bool, True, "Use GPU or not.") +add_arg('share_rnn_weights',bool, True, "Share input-hidden weights across " + "bi-directional RNNs. Not for GRU.") add_arg('tune_manifest', str, - 'data/librispeech/manifest.dev', + 'data/librispeech/manifest.dev-clean', "Filepath of manifest to tune.") add_arg('mean_std_path', str, 'data/librispeech/mean_std.npz', @@ -61,6 +66,23 @@ add_arg('specgram_type', str, # yapf: disable args = parser.parse_args() +def plot_error_surface(params_grid, err_ave, fig_name): + import matplotlib.pyplot as plt + import mpl_toolkits.mplot3d as Axes3D + fig = plt.figure() + ax = Axes3D(fig) + alphas = [ param[0] for param in params_grid ] + betas = [ param[1] for param in params_grid] + ALPHAS = np.reshape(alphas, (args.num_alphas, args.num_betas)) + BETAS = np.reshape(betas, (args.num_alphas, args.num_betas)) + ERR_AVE = np.reshape(err_ave, (args.num_alphas, args.num_betas)) + ax.plot_surface(ALPHAS, BETAS, WERS, + rstride=1, cstride=1, alpha=0.8, cmap='rainbow') + ax.set_xlabel('alpha') + ax.set_ylabel('beta') + z_label = 'WER' if args.error_rate_type == 'wer' else 'CER' + ax.set_zlabel(z_label) + plt.savefig(fig_name) def tune(): """Tune parameters alpha and beta on one minibatch.""" @@ -77,7 +99,7 @@ def tune(): num_threads=1) batch_reader = data_generator.batch_reader_creator( manifest_path=args.tune_manifest, - batch_size=args.num_samples, + batch_size=args.batch_size, sortagrad=False, shuffle_method=None) tune_data = batch_reader().next() @@ -95,31 +117,80 @@ def tune(): pretrained_model_path=args.model_path, share_rnn_weights=args.share_rnn_weights) + # decoders only accept string encoded in utf-8 + vocab_list = [chars.encode("utf-8") for chars in data_generator.vocab_list] + + error_rate_func = cer if args.error_rate_type == 'cer' else wer # create grid for search cand_alphas = np.linspace(args.alpha_from, args.alpha_to, args.num_alphas) cand_betas = np.linspace(args.beta_from, args.beta_to, args.num_betas) params_grid = [(alpha, beta) for alpha in cand_alphas for beta in cand_betas] - ## tune parameters in loop - for alpha, beta in params_grid: - result_transcripts = ds2_model.infer_batch( - infer_data=tune_data, - decoding_method='ctc_beam_search', - beam_alpha=alpha, - beam_beta=beta, - beam_size=args.beam_size, - cutoff_prob=args.cutoff_prob, - vocab_list=data_generator.vocab_list, - language_model_path=args.lang_model_path, - num_processes=args.num_proc_bsearch) - wer_sum, num_ins = 0.0, 0 - for target, result in zip(target_transcripts, result_transcripts): - wer_sum += wer(target, result) - num_ins += 1 - print("alpha = %f\tbeta = %f\tWER = %f" % - (alpha, beta, wer_sum / num_ins)) + err_sum = [0.0 for i in xrange(len(params_grid))] + err_ave = [0.0 for i in xrange(len(params_grid))] + num_ins, cur_batch = 0, 0 + ## incremental tuning parameters over multiple batches + for infer_data in batch_reader(): + if (args.num_batches >= 0) and (cur_batch >= args.num_batches): + break + + target_transcripts = [ + ''.join([data_generator.vocab_list[token] for token in transcript]) + for _, transcript in infer_data + ] + + num_ins += len(target_transcripts) + # grid search + for index, (alpha, beta) in enumerate(params_grid): + result_transcripts = ds2_model.infer_batch( + infer_data=infer_data, + decoding_method='ctc_beam_search', + beam_alpha=alpha, + beam_beta=beta, + beam_size=args.beam_size, + cutoff_prob=args.cutoff_prob, + cutoff_top_n=args.cutoff_top_n, + vocab_list=vocab_list, + language_model_path=args.lang_model_path, + num_processes=args.num_proc_bsearch) + + for target, result in zip(target_transcripts, result_transcripts): + err_sum[index] += error_rate_func(target, result) + err_ave[index] = err_sum[index] / num_ins + # print("alpha = %f, beta = %f, WER = %f" % + # (alpha, beta, err_ave[index])) + if index % 10 == 0: + sys.stdout.write('.') + sys.stdout.flush() + + # output on-line tuning result at the the end of current batch + err_ave_min = min(err_ave) + min_index = err_ave.index(err_ave_min) + print("\nBatch %d, opt.(alpha, beta) = (%f, %f), min. error_rate = %f" + %(cur_batch, params_grid[min_index][0], + params_grid[min_index][1], err_ave_min)) + cur_batch += 1 + + # output WER/CER at every point + print("\nerror rate at each point:\n") + for index in xrange(len(params_grid)): + print("(%f, %f), error_rate = %f" + % (params_grid[index][0], params_grid[index][1], err_ave[index])) + + err_ave_min = min(err_ave) + min_index = err_ave.index(err_ave_min) + print("\nTuning on %d batches, opt. (alpha, beta) = (%f, %f)" + % (args.num_batches, params_grid[min_index][0], + params_grid[min_index][1])) + + if args.output_fig == True: + fig_name = ("error_surface_alphas_%d_betas_%d" % + (args.num_alphas, args.num_betas)) + plot_error_surface(params_grid, err_ave, fig_name) + ds2_model.logger.info("output figure %s" % fig_name) + ds2_model.logger.info("finish inference") def main(): print_arguments(args) From a87e3d0f6126cace7e817628a33f027bf9f3212b Mon Sep 17 00:00:00 2001 From: yangyaming Date: Tue, 19 Sep 2017 20:26:42 +0800 Subject: [PATCH 218/335] Refine doc. --- README.md | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 38567c364..a70f84df8 100644 --- a/README.md +++ b/README.md @@ -217,19 +217,18 @@ cd models/lm sh download_lm_en.sh sh download_lm_ch.sh ``` -If you wish to train your own better language model, please refer to [KenLM](https://github.com/kpu/kenlm) for tutorials. -Here we provide some tips to show how we prepearing our english and mandarin language models. +If you wish to train your own better language model, please refer to [KenLM](https://github.com/kpu/kenlm) for tutorials. Here we provide some tips to show how we preparing our english and mandarin language models. You can take it as a reference when you train your own. #### English LM The english corpus is from the [Common Crawl Repository](http://commoncrawl.org) and you can download it from [statmt](http://data.statmt.org/ngrams/deduped_en). We use part en.00 to train our english languge model. There are some preprocessing steps before training: - * Characters which not in [A-Za-z0-9\s'] are removed and arabic numbers are converted to english numbers like 1000 to one thousand. - * Repeated whitespace are squeezed to one and the beginning whitespace are removed. Notice that all transcriptions are lowercase, so all characters are converted to lowercases. - * Top 400000 words by frequency are selected to build the vocabulary and all words not in the vocabulary are replaced with 'UNKNOWNWORD'. + * Characters not in \[A-Za-z0-9\s'\] (\s represents whitespace characters) are removed and arabic numbers are converted to english numbers like 1000 to one thousand. + * Repeated whitespace characters are squeezed to one and the beginning whitespace characters are removed. Notice that all transcriptions are lowercase, so all characters are converted to lowercase. + * Top 400,000 most frequent words are selected to build the vocabulary and the rest are replaced with 'UNKNOWNWORD'. -Now the preprocessing is done and we get a clean corpus to train the language model. Our released language model are pruned by '0 1 1 1 1'. To save disk storage we convert the arpa file to 'trie' binary file with parameters '-a 22 -q 8 -b 8'. +Now the preprocessing is done and we get a clean corpus to train the language model. Our released language model are trained with agruments '-o 5 --prune 0 1 1 1 1'. '-o 5' means the max order of language model is 5. '--prune 0 1 1 1 1' represents count thresholds for each order and more specifically it will prune singletons for orders two and higher. To save disk storage we convert the arpa file to 'trie' binary file with arguments '-a 22 -q 8 -b 8'. '-a' represents the maximum number of leading bits of pointers in 'trie' to chop. '-q -b' are quantization parameters for probability and backoff. TODO: any other requirements or tips to add? From 0057ca1fb57a2a7e862f1313fb5d9e19326455b0 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Tue, 19 Sep 2017 22:03:00 +0800 Subject: [PATCH 219/335] Add doc for mandarin lm. --- README.md | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index a70f84df8..bbd1c885b 100644 --- a/README.md +++ b/README.md @@ -230,7 +230,15 @@ The english corpus is from the [Common Crawl Repository](http://commoncrawl.org) Now the preprocessing is done and we get a clean corpus to train the language model. Our released language model are trained with agruments '-o 5 --prune 0 1 1 1 1'. '-o 5' means the max order of language model is 5. '--prune 0 1 1 1 1' represents count thresholds for each order and more specifically it will prune singletons for orders two and higher. To save disk storage we convert the arpa file to 'trie' binary file with arguments '-a 22 -q 8 -b 8'. '-a' represents the maximum number of leading bits of pointers in 'trie' to chop. '-q -b' are quantization parameters for probability and backoff. -TODO: any other requirements or tips to add? +#### Mandarin LM + +Different from word-based language model, mandarin language model is character-based where each token is a chinese character. We use an internal corpus to train the released mandarin language model. This corpus contains billions of tokens. The preprocessing has small difference from english language model and all steps are: + + * The beginning and trailing whitespace characters are removed. + * English punctuations and chinese punctuations are removed. + * Insert a whitespace character between two tokens. + +Please notice that the released language model only contains chinese simplified characters. When preprocessing done we can begin to train the language model. The key training parameters are '-o 5 --prune 0 1 2 4 4'. Please refer above section for the meaning of each parameter. We also convert the arpa file to binary file using default settings. ### Speech-to-text Inference From e909396f91cead583dde8bf85c3bf70b3b5b82e1 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Thu, 21 Sep 2017 10:54:08 +0800 Subject: [PATCH 220/335] Refine doc. --- README.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index bbd1c885b..fca2528aa 100644 --- a/README.md +++ b/README.md @@ -218,13 +218,13 @@ sh download_lm_en.sh sh download_lm_ch.sh ``` -If you wish to train your own better language model, please refer to [KenLM](https://github.com/kpu/kenlm) for tutorials. Here we provide some tips to show how we preparing our english and mandarin language models. You can take it as a reference when you train your own. +If you wish to train your own better language model, please refer to [KenLM](https://github.com/kpu/kenlm) for tutorials. Here we provide some tips to show how we preparing our English and Mandarin language models. You can take it as a reference when you train your own. #### English LM -The english corpus is from the [Common Crawl Repository](http://commoncrawl.org) and you can download it from [statmt](http://data.statmt.org/ngrams/deduped_en). We use part en.00 to train our english languge model. There are some preprocessing steps before training: +The English corpus is from the [Common Crawl Repository](http://commoncrawl.org) and you can download it from [statmt](http://data.statmt.org/ngrams/deduped_en). We use part en.00 to train our English languge model. There are some preprocessing steps before training: - * Characters not in \[A-Za-z0-9\s'\] (\s represents whitespace characters) are removed and arabic numbers are converted to english numbers like 1000 to one thousand. + * Characters not in \[A-Za-z0-9\s'\] (\s represents whitespace characters) are removed and Arabic numbers are converted to English numbers like 1000 to one thousand. * Repeated whitespace characters are squeezed to one and the beginning whitespace characters are removed. Notice that all transcriptions are lowercase, so all characters are converted to lowercase. * Top 400,000 most frequent words are selected to build the vocabulary and the rest are replaced with 'UNKNOWNWORD'. @@ -232,13 +232,13 @@ Now the preprocessing is done and we get a clean corpus to train the language mo #### Mandarin LM -Different from word-based language model, mandarin language model is character-based where each token is a chinese character. We use an internal corpus to train the released mandarin language model. This corpus contains billions of tokens. The preprocessing has small difference from english language model and all steps are: +Different from the English language model, Mandarin language model is character-based where each token is a Chinese character. We use an internal corpus to train the released Mandarin language model. This corpus contains billions of tokens. The preprocessing has tiny difference from English language model and main steps include: * The beginning and trailing whitespace characters are removed. - * English punctuations and chinese punctuations are removed. - * Insert a whitespace character between two tokens. + * English punctuations and Chinese punctuations are removed. + * A whitespace character between two tokens is inserted. -Please notice that the released language model only contains chinese simplified characters. When preprocessing done we can begin to train the language model. The key training parameters are '-o 5 --prune 0 1 2 4 4'. Please refer above section for the meaning of each parameter. We also convert the arpa file to binary file using default settings. +Please notice that the released language model only contains Chinese simplified characters. After preprocessing done we can begin to train the language model. The key training arguments are '-o 5 --prune 0 1 2 4 4'. Please refer above section for the meaning of each argument. We also convert the arpa file to binary file using default settings. ### Speech-to-text Inference From 0dcf13f0fc69d67c96841d71f7b60fed40b05a29 Mon Sep 17 00:00:00 2001 From: lispczz Date: Thu, 21 Sep 2017 16:02:43 +0800 Subject: [PATCH 221/335] fix a deep speech 2 speed bug --- data_utils/data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data_utils/data.py b/data_utils/data.py index 8bff6826d..7ddf1f339 100644 --- a/data_utils/data.py +++ b/data_utils/data.py @@ -327,7 +327,7 @@ class DataGenerator(object): shift_len = self._rng.randint(0, batch_size - 1) batch_manifest = zip(*[iter(manifest[shift_len:])] * batch_size) self._rng.shuffle(batch_manifest) - batch_manifest = list(sum(batch_manifest, ())) + batch_manifest = [item for batch in batch_manifest for item in batch] if not clipped: res_len = len(manifest) - shift_len - len(batch_manifest) batch_manifest.extend(manifest[-res_len:]) From e6e7b13222fb642dc0757b91305f1c6042edf64b Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Mon, 25 Sep 2017 18:28:44 +0800 Subject: [PATCH 222/335] remove the log praser in tuning script --- examples/librispeech/run_tune.sh | 10 +++---- examples/tiny/run_tune.sh | 18 +++++++----- tools/tune.py | 50 ++++++++++---------------------- 3 files changed, 30 insertions(+), 48 deletions(-) diff --git a/examples/librispeech/run_tune.sh b/examples/librispeech/run_tune.sh index 48ddb0029..1f76ad700 100644 --- a/examples/librispeech/run_tune.sh +++ b/examples/librispeech/run_tune.sh @@ -3,18 +3,18 @@ pushd ../.. > /dev/null # grid-search for hyper-parameters in language model -CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ +CUDA_VISIBLE_DEVICES=0,1,2,3 \ python -u tools/tune.py \ ---num_batches=2 \ ---batch_size=24 \ +--num_batches=-1 \ +--batch_size=256 \ --trainer_count=8 \ --beam_size=500 \ --num_proc_bsearch=12 \ --num_conv_layers=2 \ --num_rnn_layers=3 \ --rnn_layer_size=2048 \ ---num_alphas=2 \ ---num_betas=2 \ +--num_alphas=45 \ +--num_betas=8 \ --alpha_from=1.0 \ --alpha_to=3.2 \ --beta_from=0.1 \ diff --git a/examples/tiny/run_tune.sh b/examples/tiny/run_tune.sh index 926e9f8d5..564da4acd 100644 --- a/examples/tiny/run_tune.sh +++ b/examples/tiny/run_tune.sh @@ -5,20 +5,22 @@ pushd ../.. > /dev/null # grid-search for hyper-parameters in language model CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ python -u tools/tune.py \ ---num_samples=100 \ +--num_batches=1 \ +--batch_size=24 \ --trainer_count=8 \ --beam_size=500 \ --num_proc_bsearch=12 \ --num_conv_layers=2 \ --num_rnn_layers=3 \ --rnn_layer_size=2048 \ ---num_alphas=14 \ ---num_betas=20 \ ---alpha_from=0.1 \ ---alpha_to=0.36 \ ---beta_from=0.05 \ ---beta_to=1.0 \ ---cutoff_prob=0.99 \ +--num_alphas=45 \ +--num_betas=8 \ +--alpha_from=1.0 \ +--alpha_to=3.2 \ +--beta_from=0.1 \ +--beta_to=0.45 \ +--cutoff_prob=1.0 \ +--cutoff_top_n=40 \ --use_gru=False \ --use_gpu=True \ --share_rnn_weights=True \ diff --git a/tools/tune.py b/tools/tune.py index c2e42cd9f..f03f88381 100644 --- a/tools/tune.py +++ b/tools/tune.py @@ -34,7 +34,6 @@ add_arg('beta_from', float, 0.1, "Where beta starts tuning from.") add_arg('beta_to', float, 0.45, "Where beta ends tuning with.") add_arg('cutoff_prob', float, 1.0, "Cutoff probability for pruning.") add_arg('cutoff_top_n', int, 40, "Cutoff number for pruning.") -add_arg('output_fig', bool, True, "Output error rate figure or not.") add_arg('use_gru', bool, False, "Use GRUs instead of simple RNNs.") add_arg('use_gpu', bool, True, "Use GPU or not.") add_arg('share_rnn_weights',bool, True, "Share input-hidden weights across " @@ -66,26 +65,9 @@ add_arg('specgram_type', str, # yapf: disable args = parser.parse_args() -def plot_error_surface(params_grid, err_ave, fig_name): - import matplotlib.pyplot as plt - import mpl_toolkits.mplot3d as Axes3D - fig = plt.figure() - ax = Axes3D(fig) - alphas = [ param[0] for param in params_grid ] - betas = [ param[1] for param in params_grid] - ALPHAS = np.reshape(alphas, (args.num_alphas, args.num_betas)) - BETAS = np.reshape(betas, (args.num_alphas, args.num_betas)) - ERR_AVE = np.reshape(err_ave, (args.num_alphas, args.num_betas)) - ax.plot_surface(ALPHAS, BETAS, WERS, - rstride=1, cstride=1, alpha=0.8, cmap='rainbow') - ax.set_xlabel('alpha') - ax.set_ylabel('beta') - z_label = 'WER' if args.error_rate_type == 'wer' else 'CER' - ax.set_zlabel(z_label) - plt.savefig(fig_name) def tune(): - """Tune parameters alpha and beta on one minibatch.""" + """Tune parameters alpha and beta incrementally.""" if not args.num_alphas >= 0: raise ValueError("num_alphas must be non-negative!") if not args.num_betas >= 0: @@ -160,38 +142,36 @@ def tune(): err_ave[index] = err_sum[index] / num_ins # print("alpha = %f, beta = %f, WER = %f" % # (alpha, beta, err_ave[index])) - if index % 10 == 0: + if index % 2 == 0: sys.stdout.write('.') sys.stdout.flush() # output on-line tuning result at the the end of current batch err_ave_min = min(err_ave) min_index = err_ave.index(err_ave_min) - print("\nBatch %d, opt.(alpha, beta) = (%f, %f), min. error_rate = %f" - %(cur_batch, params_grid[min_index][0], - params_grid[min_index][1], err_ave_min)) + print("\nBatch %d [%d/?], current opt (alpha, beta) = (%s, %s), " + " min [%s] = %f" %(cur_batch, num_ins, + "%.3f" % params_grid[min_index][0], + "%.3f" % params_grid[min_index][1], + args.error_rate_type, err_ave_min)) cur_batch += 1 # output WER/CER at every point - print("\nerror rate at each point:\n") + print("\nFinal %s:\n" % args.error_rate_type) for index in xrange(len(params_grid)): - print("(%f, %f), error_rate = %f" - % (params_grid[index][0], params_grid[index][1], err_ave[index])) + print("(alpha, beta) = (%s, %s), [%s] = %f" + % ("%.3f" % params_grid[index][0], "%.3f" % params_grid[index][1], + args.error_rate_type, err_ave[index])) err_ave_min = min(err_ave) min_index = err_ave.index(err_ave_min) - print("\nTuning on %d batches, opt. (alpha, beta) = (%f, %f)" - % (args.num_batches, params_grid[min_index][0], - params_grid[min_index][1])) - - if args.output_fig == True: - fig_name = ("error_surface_alphas_%d_betas_%d" % - (args.num_alphas, args.num_betas)) - plot_error_surface(params_grid, err_ave, fig_name) - ds2_model.logger.info("output figure %s" % fig_name) + print("\nFinish tuning on %d batches, final opt (alpha, beta) = (%s, %s)" + % (args.num_batches, "%.3f" % params_grid[min_index][0], + "%.3f" % params_grid[min_index][1])) ds2_model.logger.info("finish inference") + def main(): print_arguments(args) paddle.init(use_gpu=args.use_gpu, trainer_count=args.trainer_count) From 764ce62445473ec1e91cf9867628b7f5e287a621 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Mon, 25 Sep 2017 18:35:16 +0800 Subject: [PATCH 223/335] clean code in tuning script --- tools/tune.py | 48 +++++++++++++++++++++++------------------------- 1 file changed, 23 insertions(+), 25 deletions(-) diff --git a/tools/tune.py b/tools/tune.py index f03f88381..e0721a449 100644 --- a/tools/tune.py +++ b/tools/tune.py @@ -17,27 +17,27 @@ from utils.utility import add_arguments, print_arguments parser = argparse.ArgumentParser(description=__doc__) add_arg = functools.partial(add_arguments, argparser=parser) # yapf: disable -add_arg('num_batches', int, -1, "# of batches tuning on. " - "Default -1, on whole dev set.") -add_arg('batch_size', int, 256, "# of samples per batch.") -add_arg('trainer_count', int, 8, "# of Trainers (CPUs or GPUs).") -add_arg('beam_size', int, 500, "Beam search width.") -add_arg('num_proc_bsearch', int, 12, "# of CPUs for beam search.") -add_arg('num_conv_layers', int, 2, "# of convolution layers.") -add_arg('num_rnn_layers', int, 3, "# of recurrent layers.") -add_arg('rnn_layer_size', int, 2048, "# of recurrent cells per layer.") -add_arg('num_alphas', int, 45, "# of alpha candidates for tuning.") -add_arg('num_betas', int, 8, "# of beta candidates for tuning.") -add_arg('alpha_from', float, 1.0, "Where alpha starts tuning from.") -add_arg('alpha_to', float, 3.2, "Where alpha ends tuning with.") -add_arg('beta_from', float, 0.1, "Where beta starts tuning from.") -add_arg('beta_to', float, 0.45, "Where beta ends tuning with.") -add_arg('cutoff_prob', float, 1.0, "Cutoff probability for pruning.") -add_arg('cutoff_top_n', int, 40, "Cutoff number for pruning.") -add_arg('use_gru', bool, False, "Use GRUs instead of simple RNNs.") -add_arg('use_gpu', bool, True, "Use GPU or not.") -add_arg('share_rnn_weights',bool, True, "Share input-hidden weights across " - "bi-directional RNNs. Not for GRU.") +add_arg('num_batches', int, -1, "# of batches tuning on. " + "Default -1, on whole dev set.") +add_arg('batch_size', int, 256, "# of samples per batch.") +add_arg('trainer_count', int, 8, "# of Trainers (CPUs or GPUs).") +add_arg('beam_size', int, 500, "Beam search width.") +add_arg('num_proc_bsearch', int, 12, "# of CPUs for beam search.") +add_arg('num_conv_layers', int, 2, "# of convolution layers.") +add_arg('num_rnn_layers', int, 3, "# of recurrent layers.") +add_arg('rnn_layer_size', int, 2048, "# of recurrent cells per layer.") +add_arg('num_alphas', int, 45, "# of alpha candidates for tuning.") +add_arg('num_betas', int, 8, "# of beta candidates for tuning.") +add_arg('alpha_from', float, 1.0, "Where alpha starts tuning from.") +add_arg('alpha_to', float, 3.2, "Where alpha ends tuning with.") +add_arg('beta_from', float, 0.1, "Where beta starts tuning from.") +add_arg('beta_to', float, 0.45, "Where beta ends tuning with.") +add_arg('cutoff_prob', float, 1.0, "Cutoff probability for pruning.") +add_arg('cutoff_top_n', int, 40, "Cutoff number for pruning.") +add_arg('use_gru', bool, False, "Use GRUs instead of simple RNNs.") +add_arg('use_gpu', bool, True, "Use GPU or not.") +add_arg('share_rnn_weights',bool, True, "Share input-hidden weights across " + "bi-directional RNNs. Not for GRU.") add_arg('tune_manifest', str, 'data/librispeech/manifest.dev-clean', "Filepath of manifest to tune.") @@ -140,13 +140,11 @@ def tune(): for target, result in zip(target_transcripts, result_transcripts): err_sum[index] += error_rate_func(target, result) err_ave[index] = err_sum[index] / num_ins - # print("alpha = %f, beta = %f, WER = %f" % - # (alpha, beta, err_ave[index])) if index % 2 == 0: sys.stdout.write('.') sys.stdout.flush() - # output on-line tuning result at the the end of current batch + # output on-line tuning result at the end of current batch err_ave_min = min(err_ave) min_index = err_ave.index(err_ave_min) print("\nBatch %d [%d/?], current opt (alpha, beta) = (%s, %s), " @@ -156,7 +154,7 @@ def tune(): args.error_rate_type, err_ave_min)) cur_batch += 1 - # output WER/CER at every point + # output WER/CER at every (alpha, beta) print("\nFinal %s:\n" % args.error_rate_type) for index in xrange(len(params_grid)): print("(alpha, beta) = (%s, %s), [%s] = %f" From bad7e7fa24baa6c69562f1be68086e67ae9fa421 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Mon, 25 Sep 2017 18:44:16 +0800 Subject: [PATCH 224/335] add the import of cer in tuning script --- tools/tune.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/tune.py b/tools/tune.py index e0721a449..b1d7709df 100644 --- a/tools/tune.py +++ b/tools/tune.py @@ -11,7 +11,7 @@ import paddle.v2 as paddle import _init_paths from data_utils.data import DataGenerator from model_utils.model import DeepSpeech2Model -from utils.error_rate import wer +from utils.error_rate import wer, cer from utils.utility import add_arguments, print_arguments parser = argparse.ArgumentParser(description=__doc__) From 1db71425c26f92990c58d781ff61fecc8711a285 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Wed, 27 Sep 2017 12:02:11 +0800 Subject: [PATCH 225/335] avoid repeated infer for same batch in the tuning of DS2 --- tools/tune.py | 96 ++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 68 insertions(+), 28 deletions(-) diff --git a/tools/tune.py b/tools/tune.py index b1d7709df..46aa0e52a 100644 --- a/tools/tune.py +++ b/tools/tune.py @@ -7,10 +7,14 @@ import sys import numpy as np import argparse import functools +import gzip +import logging import paddle.v2 as paddle import _init_paths from data_utils.data import DataGenerator -from model_utils.model import DeepSpeech2Model +from decoders.swig_wrapper import Scorer +from decoders.swig_wrapper import ctc_beam_search_decoder_batch +from model_utils.model import deep_speech_v2_network from utils.error_rate import wer, cer from utils.utility import add_arguments, print_arguments @@ -66,6 +70,9 @@ add_arg('specgram_type', str, args = parser.parse_args() +logging.basicConfig( + format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)d] %(message)s') + def tune(): """Tune parameters alpha and beta incrementally.""" if not args.num_alphas >= 0: @@ -79,29 +86,55 @@ def tune(): augmentation_config='{}', specgram_type=args.specgram_type, num_threads=1) + + audio_data = paddle.layer.data( + name="audio_spectrogram", + type=paddle.data_type.dense_array(161 * 161)) + text_data = paddle.layer.data( + name="transcript_text", + type=paddle.data_type.integer_value_sequence(data_generator.vocab_size)) + + output_probs, _ = deep_speech_v2_network( + audio_data=audio_data, + text_data=text_data, + dict_size=data_generator.vocab_size, + num_conv_layers=args.num_conv_layers, + num_rnn_layers=args.num_rnn_layers, + rnn_size=args.rnn_layer_size, + use_gru=args.use_gru, + share_rnn_weights=args.share_rnn_weights) + batch_reader = data_generator.batch_reader_creator( manifest_path=args.tune_manifest, batch_size=args.batch_size, sortagrad=False, shuffle_method=None) - tune_data = batch_reader().next() - target_transcripts = [ - ''.join([data_generator.vocab_list[token] for token in transcript]) - for _, transcript in tune_data - ] - - ds2_model = DeepSpeech2Model( - vocab_size=data_generator.vocab_size, - num_conv_layers=args.num_conv_layers, - num_rnn_layers=args.num_rnn_layers, - rnn_layer_size=args.rnn_layer_size, - use_gru=args.use_gru, - pretrained_model_path=args.model_path, - share_rnn_weights=args.share_rnn_weights) + # load parameters + parameters = paddle.parameters.Parameters.from_tar( + gzip.open(args.model_path)) + + inferer = paddle.inference.Inference( + output_layer=output_probs, parameters=parameters) # decoders only accept string encoded in utf-8 vocab_list = [chars.encode("utf-8") for chars in data_generator.vocab_list] + # init logger + logger = logging.getLogger("") + logger.setLevel(level=logging.INFO) + # init external scorer + logger.info("begin to initialize the external scorer for tuning") + ext_scorer = Scorer( + alpha=args.alpha_from, + beta=args.beta_from, + model_path=args.lang_model_path, + vocabulary=vocab_list) + logger.info("language model: " + "is_character_based = %d," % ext_scorer.is_character_based() + + " max_order = %d," % ext_scorer.get_max_order() + + " dict_size = %d" % ext_scorer.get_dict_size()) + logger.info("end initializing scorer. Start tuning ...") + error_rate_func = cer if args.error_rate_type == 'cer' else wer # create grid for search cand_alphas = np.linspace(args.alpha_from, args.alpha_to, args.num_alphas) @@ -116,6 +149,13 @@ def tune(): for infer_data in batch_reader(): if (args.num_batches >= 0) and (cur_batch >= args.num_batches): break + infer_results = inferer.infer(input=infer_data) + + num_steps = len(infer_results) // len(infer_data) + probs_split = [ + infer_results[i * num_steps:(i + 1) * num_steps] + for i in xrange(len(infer_data)) + ] target_transcripts = [ ''.join([data_generator.vocab_list[token] for token in transcript]) @@ -125,18 +165,18 @@ def tune(): num_ins += len(target_transcripts) # grid search for index, (alpha, beta) in enumerate(params_grid): - result_transcripts = ds2_model.infer_batch( - infer_data=infer_data, - decoding_method='ctc_beam_search', - beam_alpha=alpha, - beam_beta=beta, - beam_size=args.beam_size, - cutoff_prob=args.cutoff_prob, - cutoff_top_n=args.cutoff_top_n, - vocab_list=vocab_list, - language_model_path=args.lang_model_path, - num_processes=args.num_proc_bsearch) - + # reset alpha & beta + ext_scorer.reset_params(alpha, beta) + beam_search_results = ctc_beam_search_decoder_batch( + probs_split=probs_split, + vocabulary=vocab_list, + beam_size=args.beam_size, + num_processes=args.num_proc_bsearch, + cutoff_prob=args.cutoff_prob, + cutoff_top_n=args.cutoff_top_n, + ext_scoring_func=ext_scorer, ) + + result_transcripts = [res[0][1] for res in beam_search_results] for target, result in zip(target_transcripts, result_transcripts): err_sum[index] += error_rate_func(target, result) err_ave[index] = err_sum[index] / num_ins @@ -167,7 +207,7 @@ def tune(): % (args.num_batches, "%.3f" % params_grid[min_index][0], "%.3f" % params_grid[min_index][1])) - ds2_model.logger.info("finish inference") + logger.info("finish tuning") def main(): From 7d8402ab14153e0eb93dec71b79b9b414b9bfa8c Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Wed, 27 Sep 2017 13:58:24 +0800 Subject: [PATCH 226/335] format the indent in tuning script --- tools/tune.py | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/tools/tune.py b/tools/tune.py index 46aa0e52a..69c10860d 100644 --- a/tools/tune.py +++ b/tools/tune.py @@ -88,21 +88,21 @@ def tune(): num_threads=1) audio_data = paddle.layer.data( - name="audio_spectrogram", - type=paddle.data_type.dense_array(161 * 161)) + name="audio_spectrogram", + type=paddle.data_type.dense_array(161 * 161)) text_data = paddle.layer.data( - name="transcript_text", - type=paddle.data_type.integer_value_sequence(data_generator.vocab_size)) + name="transcript_text", + type=paddle.data_type.integer_value_sequence(data_generator.vocab_size)) output_probs, _ = deep_speech_v2_network( - audio_data=audio_data, - text_data=text_data, - dict_size=data_generator.vocab_size, - num_conv_layers=args.num_conv_layers, - num_rnn_layers=args.num_rnn_layers, - rnn_size=args.rnn_layer_size, - use_gru=args.use_gru, - share_rnn_weights=args.share_rnn_weights) + audio_data=audio_data, + text_data=text_data, + dict_size=data_generator.vocab_size, + num_conv_layers=args.num_conv_layers, + num_rnn_layers=args.num_rnn_layers, + rnn_size=args.rnn_layer_size, + use_gru=args.use_gru, + share_rnn_weights=args.share_rnn_weights) batch_reader = data_generator.batch_reader_creator( manifest_path=args.tune_manifest, @@ -168,13 +168,13 @@ def tune(): # reset alpha & beta ext_scorer.reset_params(alpha, beta) beam_search_results = ctc_beam_search_decoder_batch( - probs_split=probs_split, - vocabulary=vocab_list, - beam_size=args.beam_size, - num_processes=args.num_proc_bsearch, - cutoff_prob=args.cutoff_prob, - cutoff_top_n=args.cutoff_top_n, - ext_scoring_func=ext_scorer, ) + probs_split=probs_split, + vocabulary=vocab_list, + beam_size=args.beam_size, + num_processes=args.num_proc_bsearch, + cutoff_prob=args.cutoff_prob, + cutoff_top_n=args.cutoff_top_n, + ext_scoring_func=ext_scorer, ) result_transcripts = [res[0][1] for res in beam_search_results] for target, result in zip(target_transcripts, result_transcripts): From 70e43c184cb969bc5463a9c0253847d2a7e8219f Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Wed, 27 Sep 2017 15:40:46 +0800 Subject: [PATCH 227/335] add model path check in tuning script --- tools/tune.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/tune.py b/tools/tune.py index 69c10860d..85c2d7388 100644 --- a/tools/tune.py +++ b/tools/tune.py @@ -4,6 +4,7 @@ from __future__ import division from __future__ import print_function import sys +import os import numpy as np import argparse import functools @@ -111,6 +112,8 @@ def tune(): shuffle_method=None) # load parameters + if not os.path.isfile(args.model_path): + raise IOError("Invaid model path: %s" % args.model_path) parameters = paddle.parameters.Parameters.from_tar( gzip.open(args.model_path)) @@ -124,6 +127,8 @@ def tune(): logger.setLevel(level=logging.INFO) # init external scorer logger.info("begin to initialize the external scorer for tuning") + if not os.path.isfile(args.lang_model_path): + raise IOError("Invaid language model path: %s" % args.lang_model_path) ext_scorer = Scorer( alpha=args.alpha_from, beta=args.beta_from, From 3663ba57d062f6a5f48369c63e5f4982bd7bc198 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Wed, 27 Sep 2017 17:51:52 +0800 Subject: [PATCH 228/335] make *.sh files more robust in DS2 --- examples/aishell/run_data.sh | 2 +- examples/aishell/run_infer.sh | 6 +++--- examples/aishell/run_infer_golden.sh | 10 +++++----- examples/aishell/run_test.sh | 6 +++--- examples/aishell/run_test_golden.sh | 10 +++++----- examples/aishell/run_train.sh | 2 +- examples/librispeech/run_data.sh | 2 +- examples/librispeech/run_infer.sh | 6 +++--- examples/librispeech/run_infer_golden.sh | 10 +++++----- examples/librispeech/run_test.sh | 6 +++--- examples/librispeech/run_test_golden.sh | 10 +++++----- examples/librispeech/run_train.sh | 2 +- examples/librispeech/run_tune.sh | 2 +- examples/mandarin/run_demo_client.sh | 2 +- examples/mandarin/run_demo_server.sh | 10 +++++----- examples/tiny/run_data.sh | 2 +- examples/tiny/run_infer.sh | 6 +++--- examples/tiny/run_infer_golden.sh | 10 +++++----- examples/tiny/run_test.sh | 6 +++--- examples/tiny/run_test_golden.sh | 10 +++++----- examples/tiny/run_train.sh | 2 +- examples/tiny/run_tune.sh | 2 +- models/aishell/download_model.sh | 2 +- models/librispeech/download_model.sh | 2 +- models/lm/download_lm_ch.sh | 2 +- models/lm/download_lm_en.sh | 2 +- 26 files changed, 66 insertions(+), 66 deletions(-) diff --git a/examples/aishell/run_data.sh b/examples/aishell/run_data.sh index db27c5300..8bacf6d80 100644 --- a/examples/aishell/run_data.sh +++ b/examples/aishell/run_data.sh @@ -1,6 +1,6 @@ #! /usr/bin/env bash -pushd ../.. > /dev/null +cd ../.. > /dev/null # download data, generate manifests PYTHONPATH=.:$PYTHONPATH python data/aishell/aishell.py \ diff --git a/examples/aishell/run_infer.sh b/examples/aishell/run_infer.sh index 332bdbe1b..404555e8b 100644 --- a/examples/aishell/run_infer.sh +++ b/examples/aishell/run_infer.sh @@ -1,14 +1,14 @@ #! /usr/bin/env bash -pushd ../.. > /dev/null +cd ../.. > /dev/null # download language model -pushd models/lm > /dev/null +cd models/lm > /dev/null sh download_lm_ch.sh if [ $? -ne 0 ]; then exit 1 fi -popd > /dev/null +cd - > /dev/null # infer diff --git a/examples/aishell/run_infer_golden.sh b/examples/aishell/run_infer_golden.sh index ac79a4dde..4701bdaac 100644 --- a/examples/aishell/run_infer_golden.sh +++ b/examples/aishell/run_infer_golden.sh @@ -1,23 +1,23 @@ #! /usr/bin/env bash -pushd ../.. > /dev/null +cd ../.. > /dev/null # download language model -pushd models/lm > /dev/null +cd models/lm > /dev/null sh download_lm_ch.sh if [ $? -ne 0 ]; then exit 1 fi -popd > /dev/null +cd - > /dev/null # download well-trained model -pushd models/aishell > /dev/null +cd models/aishell > /dev/null sh download_model.sh if [ $? -ne 0 ]; then exit 1 fi -popd > /dev/null +cd - > /dev/null # infer diff --git a/examples/aishell/run_test.sh b/examples/aishell/run_test.sh index bfbbcad54..266c73676 100644 --- a/examples/aishell/run_test.sh +++ b/examples/aishell/run_test.sh @@ -1,14 +1,14 @@ #! /usr/bin/env bash -pushd ../.. > /dev/null +cd ../.. > /dev/null # download language model -pushd models/lm > /dev/null +cd models/lm > /dev/null sh download_lm_ch.sh if [ $? -ne 0 ]; then exit 1 fi -popd > /dev/null +cd - > /dev/null # evaluate model diff --git a/examples/aishell/run_test_golden.sh b/examples/aishell/run_test_golden.sh index e40e8040c..50e7fe2c6 100644 --- a/examples/aishell/run_test_golden.sh +++ b/examples/aishell/run_test_golden.sh @@ -1,23 +1,23 @@ #! /usr/bin/env bash -pushd ../.. > /dev/null +cd ../.. > /dev/null # download language model -pushd models/lm > /dev/null +cd models/lm > /dev/null sh download_lm_ch.sh if [ $? -ne 0 ]; then exit 1 fi -popd > /dev/null +cd - > /dev/null # download well-trained model -pushd models/aishell > /dev/null +cd models/aishell > /dev/null sh download_model.sh if [ $? -ne 0 ]; then exit 1 fi -popd > /dev/null +cd - > /dev/null # evaluate model diff --git a/examples/aishell/run_train.sh b/examples/aishell/run_train.sh index 76d75fa29..8e61ec3c2 100644 --- a/examples/aishell/run_train.sh +++ b/examples/aishell/run_train.sh @@ -1,6 +1,6 @@ #! /usr/bin/env bash -pushd ../.. > /dev/null +cd ../.. > /dev/null # train model # if you wish to resume from an exists model, uncomment --init_model_path diff --git a/examples/librispeech/run_data.sh b/examples/librispeech/run_data.sh index 957416f43..b1ad68ab3 100644 --- a/examples/librispeech/run_data.sh +++ b/examples/librispeech/run_data.sh @@ -1,6 +1,6 @@ #! /usr/bin/env bash -pushd ../.. > /dev/null +cd ../.. > /dev/null # download data, generate manifests PYTHONPATH=.:$PYPYTHONPATH python data/librispeech/librispeech.py \ diff --git a/examples/librispeech/run_infer.sh b/examples/librispeech/run_infer.sh index 85587ed47..2df5b6cc4 100644 --- a/examples/librispeech/run_infer.sh +++ b/examples/librispeech/run_infer.sh @@ -1,14 +1,14 @@ #! /usr/bin/env bash -pushd ../.. > /dev/null +cd ../.. > /dev/null # download language model -pushd models/lm > /dev/null +cd models/lm > /dev/null sh download_lm_en.sh if [ $? -ne 0 ]; then exit 1 fi -popd > /dev/null +cd - > /dev/null # infer diff --git a/examples/librispeech/run_infer_golden.sh b/examples/librispeech/run_infer_golden.sh index 8feca555e..c407cabe4 100644 --- a/examples/librispeech/run_infer_golden.sh +++ b/examples/librispeech/run_infer_golden.sh @@ -1,23 +1,23 @@ #! /usr/bin/env bash -pushd ../.. > /dev/null +cd ../.. > /dev/null # download language model -pushd models/lm > /dev/null +cd models/lm > /dev/null sh download_lm_en.sh if [ $? -ne 0 ]; then exit 1 fi -popd > /dev/null +cd - > /dev/null # download well-trained model -pushd models/librispeech > /dev/null +cd models/librispeech > /dev/null sh download_model.sh if [ $? -ne 0 ]; then exit 1 fi -popd > /dev/null +cd - > /dev/null # infer diff --git a/examples/librispeech/run_test.sh b/examples/librispeech/run_test.sh index d75848b00..d79a22563 100644 --- a/examples/librispeech/run_test.sh +++ b/examples/librispeech/run_test.sh @@ -1,14 +1,14 @@ #! /usr/bin/env bash -pushd ../.. > /dev/null +cd ../.. > /dev/null # download language model -pushd models/lm > /dev/null +cd models/lm > /dev/null sh download_lm_en.sh if [ $? -ne 0 ]; then exit 1 fi -popd > /dev/null +cd - > /dev/null # evaluate model diff --git a/examples/librispeech/run_test_golden.sh b/examples/librispeech/run_test_golden.sh index 352a94156..011cdd2ab 100644 --- a/examples/librispeech/run_test_golden.sh +++ b/examples/librispeech/run_test_golden.sh @@ -1,23 +1,23 @@ #! /usr/bin/env bash -pushd ../.. > /dev/null +cd ../.. > /dev/null # download language model -pushd models/lm > /dev/null +cd models/lm > /dev/null sh download_lm_en.sh if [ $? -ne 0 ]; then exit 1 fi -popd > /dev/null +cd - > /dev/null # download well-trained model -pushd models/librispeech > /dev/null +cd models/librispeech > /dev/null sh download_model.sh if [ $? -ne 0 ]; then exit 1 fi -popd > /dev/null +cd - > /dev/null # evaluate model diff --git a/examples/librispeech/run_train.sh b/examples/librispeech/run_train.sh index 1d18f29ef..69251fe0c 100644 --- a/examples/librispeech/run_train.sh +++ b/examples/librispeech/run_train.sh @@ -1,6 +1,6 @@ #! /usr/bin/env bash -pushd ../.. > /dev/null +cd ../.. > /dev/null # train model # if you wish to resume from an exists model, uncomment --init_model_path diff --git a/examples/librispeech/run_tune.sh b/examples/librispeech/run_tune.sh index 1f76ad700..78b2972d0 100644 --- a/examples/librispeech/run_tune.sh +++ b/examples/librispeech/run_tune.sh @@ -1,6 +1,6 @@ #! /usr/bin/env bash -pushd ../.. > /dev/null +cd ../.. > /dev/null # grid-search for hyper-parameters in language model CUDA_VISIBLE_DEVICES=0,1,2,3 \ diff --git a/examples/mandarin/run_demo_client.sh b/examples/mandarin/run_demo_client.sh index bf8e54514..6ae4ddcab 100644 --- a/examples/mandarin/run_demo_client.sh +++ b/examples/mandarin/run_demo_client.sh @@ -1,6 +1,6 @@ #! /usr/bin/env bash -pushd ../.. > /dev/null +cd ../.. > /dev/null # start demo client CUDA_VISIBLE_DEVICES=0 \ diff --git a/examples/mandarin/run_demo_server.sh b/examples/mandarin/run_demo_server.sh index b0d4bc7f1..d98a99d01 100644 --- a/examples/mandarin/run_demo_server.sh +++ b/examples/mandarin/run_demo_server.sh @@ -1,24 +1,24 @@ #! /usr/bin/env bash # TODO: replace the model with a mandarin model -pushd ../.. > /dev/null +cd ../.. > /dev/null # download language model -pushd models/lm > /dev/null +cd models/lm > /dev/null sh download_lm_en.sh if [ $? -ne 0 ]; then exit 1 fi -popd > /dev/null +cd - > /dev/null # download well-trained model -pushd models/librispeech > /dev/null +cd models/librispeech > /dev/null sh download_model.sh if [ $? -ne 0 ]; then exit 1 fi -popd > /dev/null +cd - > /dev/null # start demo server diff --git a/examples/tiny/run_data.sh b/examples/tiny/run_data.sh index a98dab214..0b03ec5d1 100644 --- a/examples/tiny/run_data.sh +++ b/examples/tiny/run_data.sh @@ -1,6 +1,6 @@ #! /usr/bin/env bash -pushd ../.. > /dev/null +cd ../.. > /dev/null # prepare folder if [ ! -e data/tiny ]; then diff --git a/examples/tiny/run_infer.sh b/examples/tiny/run_infer.sh index 85b083a27..cbd049a6b 100644 --- a/examples/tiny/run_infer.sh +++ b/examples/tiny/run_infer.sh @@ -1,14 +1,14 @@ #! /usr/bin/env bash -pushd ../.. > /dev/null +cd ../.. > /dev/null # download language model -pushd models/lm > /dev/null +cd models/lm > /dev/null sh download_lm_en.sh if [ $? -ne 0 ]; then exit 1 fi -popd > /dev/null +cd - > /dev/null # infer diff --git a/examples/tiny/run_infer_golden.sh b/examples/tiny/run_infer_golden.sh index 3ee2f9aef..6567542be 100644 --- a/examples/tiny/run_infer_golden.sh +++ b/examples/tiny/run_infer_golden.sh @@ -1,23 +1,23 @@ #! /usr/bin/env bash -pushd ../.. > /dev/null +cd ../.. > /dev/null # download language model -pushd models/lm > /dev/null +cd models/lm > /dev/null sh download_lm_en.sh if [ $? -ne 0 ]; then exit 1 fi -popd > /dev/null +cd - > /dev/null # download well-trained model -pushd models/librispeech > /dev/null +cd models/librispeech > /dev/null sh download_model.sh if [ $? -ne 0 ]; then exit 1 fi -popd > /dev/null +cd - > /dev/null # infer diff --git a/examples/tiny/run_test.sh b/examples/tiny/run_test.sh index 063076328..690dfe739 100644 --- a/examples/tiny/run_test.sh +++ b/examples/tiny/run_test.sh @@ -1,14 +1,14 @@ #! /usr/bin/env bash -pushd ../.. > /dev/null +cd ../.. > /dev/null # download language model -pushd models/lm > /dev/null +cd models/lm > /dev/null sh download_lm_en.sh if [ $? -ne 0 ]; then exit 1 fi -popd > /dev/null +cd - > /dev/null # evaluate model diff --git a/examples/tiny/run_test_golden.sh b/examples/tiny/run_test_golden.sh index 351cb87cb..16adf2988 100644 --- a/examples/tiny/run_test_golden.sh +++ b/examples/tiny/run_test_golden.sh @@ -1,23 +1,23 @@ #! /usr/bin/env bash -pushd ../.. > /dev/null +cd ../.. > /dev/null # download language model -pushd models/lm > /dev/null +cd models/lm > /dev/null sh download_lm_en.sh if [ $? -ne 0 ]; then exit 1 fi -popd > /dev/null +cd - > /dev/null # download well-trained model -pushd models/librispeech > /dev/null +cd models/librispeech > /dev/null sh download_model.sh if [ $? -ne 0 ]; then exit 1 fi -popd > /dev/null +cd - > /dev/null # evaluate model diff --git a/examples/tiny/run_train.sh b/examples/tiny/run_train.sh index 957aa63bc..88b09bee5 100644 --- a/examples/tiny/run_train.sh +++ b/examples/tiny/run_train.sh @@ -1,6 +1,6 @@ #! /usr/bin/env bash -pushd ../.. > /dev/null +cd ../.. > /dev/null # train model # if you wish to resume from an exists model, uncomment --init_model_path diff --git a/examples/tiny/run_tune.sh b/examples/tiny/run_tune.sh index 564da4acd..89f8adf45 100644 --- a/examples/tiny/run_tune.sh +++ b/examples/tiny/run_tune.sh @@ -1,6 +1,6 @@ #! /usr/bin/env bash -pushd ../.. > /dev/null +cd ../.. > /dev/null # grid-search for hyper-parameters in language model CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ diff --git a/models/aishell/download_model.sh b/models/aishell/download_model.sh index 77fc84b53..19aec554e 100644 --- a/models/aishell/download_model.sh +++ b/models/aishell/download_model.sh @@ -1,6 +1,6 @@ #! /usr/bin/env bash -source ../../utils/utility.sh +. ../../utils/utility.sh URL='http://cloud.dlnel.org/filepub/?uuid=6c83b9d8-3255-4adf-9726-0fe0be3d0274' MD5=28521a58552885a81cf92a1e9b133a71 diff --git a/models/librispeech/download_model.sh b/models/librispeech/download_model.sh index 7c46c0991..0048ff166 100644 --- a/models/librispeech/download_model.sh +++ b/models/librispeech/download_model.sh @@ -1,6 +1,6 @@ #! /usr/bin/env bash -source ../../utils/utility.sh +. ../../utils/utility.sh URL='http://cloud.dlnel.org/filepub/?uuid=8e3cf742-2ff3-41ce-a49d-f6158cc06a23' MD5=2ef08f8b608a7c555592161fc14d81a6 diff --git a/models/lm/download_lm_ch.sh b/models/lm/download_lm_ch.sh index 46bfe9329..c719f9a9f 100644 --- a/models/lm/download_lm_ch.sh +++ b/models/lm/download_lm_ch.sh @@ -1,6 +1,6 @@ #! /usr/bin/env bash -source ../../utils/utility.sh +. ../../utils/utility.sh URL=http://cloud.dlnel.org/filepub/?uuid=d21861e4-4ed6-45bb-ad8e-ae417a43195e MD5="29e02312deb2e59b3c8686c7966d4fe3" diff --git a/models/lm/download_lm_en.sh b/models/lm/download_lm_en.sh index fbfe647e9..d131636e8 100644 --- a/models/lm/download_lm_en.sh +++ b/models/lm/download_lm_en.sh @@ -1,6 +1,6 @@ #! /usr/bin/env bash -source ../../utils/utility.sh +. ../../utils/utility.sh URL=http://paddlepaddle.bj.bcebos.com/model_zoo/speech/common_crawl_00.prune01111.trie.klm MD5="099a601759d467cd0a8523ff939819c5" From 8dcf1d56670d5708d8b234ee4af0c4dc7544d650 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Wed, 27 Sep 2017 18:44:13 +0800 Subject: [PATCH 229/335] fix bugs in preparing data scripts --- examples/librispeech/run_data.sh | 2 +- examples/tiny/run_data.sh | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/librispeech/run_data.sh b/examples/librispeech/run_data.sh index b1ad68ab3..12f2dc6d5 100644 --- a/examples/librispeech/run_data.sh +++ b/examples/librispeech/run_data.sh @@ -3,7 +3,7 @@ cd ../.. > /dev/null # download data, generate manifests -PYTHONPATH=.:$PYPYTHONPATH python data/librispeech/librispeech.py \ +PYTHONPATH=.:$PYTHONPATH python data/librispeech/librispeech.py \ --manifest_prefix='data/librispeech/manifest' \ --target_dir='~/.cache/paddle/dataset/speech/Libri' \ --full_download='True' diff --git a/examples/tiny/run_data.sh b/examples/tiny/run_data.sh index 0b03ec5d1..ba55d284a 100644 --- a/examples/tiny/run_data.sh +++ b/examples/tiny/run_data.sh @@ -9,7 +9,7 @@ fi # download data, generate manifests -python data/librispeech/librispeech.py \ +PYTHONPATH=.:$PYTHONPATH python data/librispeech/librispeech.py \ --manifest_prefix='data/tiny/manifest' \ --target_dir='~/.cache/paddle/dataset/speech/libri' \ --full_download='False' @@ -26,7 +26,7 @@ head -n 64 data/tiny/manifest.dev-clean > data/tiny/manifest.tiny python tools/build_vocab.py \ --count_threshold=0 \ --vocab_path='data/tiny/vocab.txt' \ ---manifest_paths='data/tiny/manifest.dev' +--manifest_paths='data/tiny/manifest.dev-clean' if [ $? -ne 0 ]; then echo "Build vocabulary failed. Terminated." From 42c58daf5fa80c35d9aa5cd1ecee0ba157629fff Mon Sep 17 00:00:00 2001 From: yangyaming Date: Thu, 28 Sep 2017 11:03:47 +0800 Subject: [PATCH 230/335] Fix bugs for demo_server.py. --- deploy/_init_paths.py | 19 +++++++++++++++++++ deploy/demo_server.py | 8 ++++++-- 2 files changed, 25 insertions(+), 2 deletions(-) create mode 100644 deploy/_init_paths.py diff --git a/deploy/_init_paths.py b/deploy/_init_paths.py new file mode 100644 index 000000000..ddabb535b --- /dev/null +++ b/deploy/_init_paths.py @@ -0,0 +1,19 @@ +"""Set up paths for DS2""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os.path +import sys + + +def add_path(path): + if path not in sys.path: + sys.path.insert(0, path) + + +this_dir = os.path.dirname(__file__) + +# Add project path to PYTHONPATH +proj_path = os.path.join(this_dir, '..') +add_path(proj_path) diff --git a/deploy/demo_server.py b/deploy/demo_server.py index 7c5584191..e3cc6705b 100644 --- a/deploy/demo_server.py +++ b/deploy/demo_server.py @@ -12,7 +12,7 @@ import paddle.v2 as paddle import _init_paths from data_utils.data import DataGenerator from model_utils.model import DeepSpeech2Model -from data_utils.utils import read_manifest +from data_utils.utility import read_manifest from utils.utility import add_arguments, print_arguments parser = argparse.ArgumentParser(description=__doc__) @@ -26,6 +26,7 @@ add_arg('rnn_layer_size', int, 2048, "# of recurrent cells per layer.") add_arg('alpha', float, 0.36, "Coef of LM for beam search.") add_arg('beta', float, 0.25, "Coef of WC for beam search.") add_arg('cutoff_prob', float, 0.99, "Cutoff probability for pruning.") +add_arg('cutoff_top_n', int, 40, "Cutoff number for pruning.") add_arg('use_gru', bool, False, "Use GRUs instead of simple RNNs.") add_arg('use_gpu', bool, True, "Use GPU or not.") add_arg('share_rnn_weights',bool, True, "Share input-hidden weights across " @@ -156,6 +157,8 @@ def start_server(): pretrained_model_path=args.model_path, share_rnn_weights=args.share_rnn_weights) + vocab_list = [chars.encode("utf-8") for chars in data_generator.vocab_list] + # prepare ASR inference handler def file_to_transcript(filename): feature = data_generator.process_utterance(filename, "") @@ -166,7 +169,8 @@ def start_server(): beam_beta=args.beta, beam_size=args.beam_size, cutoff_prob=args.cutoff_prob, - vocab_list=data_generator.vocab_list, + cutoff_top_n=args.cutoff_top_n, + vocab_list=vocab_list, language_model_path=args.lang_model_path, num_processes=1) return result_transcript[0] From 84f56118b49b9c1056b77788a1d761bf01a4ae1a Mon Sep 17 00:00:00 2001 From: yangyaming Date: Thu, 28 Sep 2017 11:09:30 +0800 Subject: [PATCH 231/335] Change default value. --- deploy/demo_server.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/deploy/demo_server.py b/deploy/demo_server.py index e3cc6705b..b007c751e 100644 --- a/deploy/demo_server.py +++ b/deploy/demo_server.py @@ -23,9 +23,9 @@ add_arg('beam_size', int, 500, "Beam search width.") add_arg('num_conv_layers', int, 2, "# of convolution layers.") add_arg('num_rnn_layers', int, 3, "# of recurrent layers.") add_arg('rnn_layer_size', int, 2048, "# of recurrent cells per layer.") -add_arg('alpha', float, 0.36, "Coef of LM for beam search.") -add_arg('beta', float, 0.25, "Coef of WC for beam search.") -add_arg('cutoff_prob', float, 0.99, "Cutoff probability for pruning.") +add_arg('alpha', float, 2.15, "Coef of LM for beam search.") +add_arg('beta', float, 0.35, "Coef of WC for beam search.") +add_arg('cutoff_prob', float, 1.0, "Cutoff probability for pruning.") add_arg('cutoff_top_n', int, 40, "Cutoff number for pruning.") add_arg('use_gru', bool, False, "Use GRUs instead of simple RNNs.") add_arg('use_gpu', bool, True, "Use GPU or not.") From 511c4540082d8241fcf4f5f10d3c14004c6c6599 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Fri, 29 Sep 2017 11:52:10 +0800 Subject: [PATCH 232/335] Bug fix. --- model_utils/model.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/model_utils/model.py b/model_utils/model.py index 67a41bd11..123eed9b4 100644 --- a/model_utils/model.py +++ b/model_utils/model.py @@ -129,9 +129,10 @@ class DeepSpeech2Model(object): else: result = trainer.test( reader=dev_batch_reader, feeding=feeding_dict) - print("\n------- Time: %d sec, Pass: %d, " - "ValidationCost: %s" % - (time.time() - start_time, event.pass_id, 0)) + print( + "\n------- Time: %d sec, Pass: %d, " + "ValidationCost: %s" % + (time.time() - start_time, event.pass_id, result.cost)) output_model_path = os.path.join( output_model_dir, "params.pass-%d.tar.gz" % event.pass_id) with gzip.open(output_model_path, 'w') as f: From 2c63f3413f7804d497a9a278bc225e2f8a3a92e5 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Fri, 29 Sep 2017 19:55:05 +0800 Subject: [PATCH 233/335] convert decoding results to unicode in DS2 --- decoders/swig_wrapper.py | 18 +++++++++++++----- examples/tiny/run_train.sh | 2 +- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/decoders/swig_wrapper.py b/decoders/swig_wrapper.py index 0a9211258..21aed03c1 100644 --- a/decoders/swig_wrapper.py +++ b/decoders/swig_wrapper.py @@ -35,7 +35,8 @@ def ctc_greedy_decoder(probs_seq, vocabulary): :return: Decoding result string. :rtype: basestring """ - return swig_decoders.ctc_greedy_decoder(probs_seq.tolist(), vocabulary) + result = swig_decoders.ctc_greedy_decoder(probs_seq.tolist(), vocabulary) + return result.decode('utf-8') def ctc_beam_search_decoder(probs_seq, @@ -69,9 +70,11 @@ def ctc_beam_search_decoder(probs_seq, results, in descending order of the probability. :rtype: list """ - return swig_decoders.ctc_beam_search_decoder(probs_seq.tolist(), vocabulary, - beam_size, cutoff_prob, - cutoff_top_n, ext_scoring_func) + beam_results = swig_decoders.ctc_beam_search_decoder( + probs_seq.tolist(), vocabulary, beam_size, cutoff_prob, cutoff_top_n, + ext_scoring_func) + beam_results = [(res[0], res[1].decode('utf-8')) for res in beam_results] + return beam_results def ctc_beam_search_decoder_batch(probs_split, @@ -111,6 +114,11 @@ def ctc_beam_search_decoder_batch(probs_split, """ probs_split = [probs_seq.tolist() for probs_seq in probs_split] - return swig_decoders.ctc_beam_search_decoder_batch( + batch_beam_results = swig_decoders.ctc_beam_search_decoder_batch( probs_split, vocabulary, beam_size, num_processes, cutoff_prob, cutoff_top_n, ext_scoring_func) + batch_beam_results = [ + [(res[0], res[1].decode("utf-8")) for res in beam_results] + for beam_results in batch_beam_results + ] + return batch_beam_results diff --git a/examples/tiny/run_train.sh b/examples/tiny/run_train.sh index 88b09bee5..e03a8aff0 100644 --- a/examples/tiny/run_train.sh +++ b/examples/tiny/run_train.sh @@ -33,7 +33,7 @@ python -u train.py \ --shuffle_method='batch_shuffle_clipped' if [ $? -ne 0 ]; then - echo "Fail to do inference!" + echo "Fail in training!" exit 1 fi From 3f608b3b81d42de45be7403716fb71894b33af5f Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Fri, 29 Sep 2017 20:12:38 +0800 Subject: [PATCH 234/335] enable the log of gradient clipping in training --- train.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/train.py b/train.py index 445f3d765..44b2ccbc4 100644 --- a/train.py +++ b/train.py @@ -118,7 +118,9 @@ def train(): def main(): print_arguments(args) - paddle.init(use_gpu=args.use_gpu, trainer_count=args.trainer_count) + paddle.init(use_gpu=args.use_gpu, + trainer_count=args.trainer_count, + log_clipping=True) train() From 37a29bf18181160671bb39c9acc954e930f2b447 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Sat, 30 Sep 2017 07:21:34 +0000 Subject: [PATCH 235/335] fix the core dump bug of DS2's training in docker --- data_utils/featurizer/audio_featurizer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/data_utils/featurizer/audio_featurizer.py b/data_utils/featurizer/audio_featurizer.py index 12f8784a9..f594de7d9 100644 --- a/data_utils/featurizer/audio_featurizer.py +++ b/data_utils/featurizer/audio_featurizer.py @@ -141,7 +141,8 @@ class AudioFeaturizer(object): # window weighting, squared Fast Fourier Transform (fft), scaling weighting = np.hanning(window_size)[:, None] fft = np.fft.rfft(windows * weighting, axis=0) - fft = np.absolute(fft)**2 + fft = np.absolute(fft) + fft = fft**2 scale = np.sum(weighting**2) * sample_rate fft[1:-1, :] *= (2.0 / scale) fft[(0, -1), :] /= scale From 64ab19c165d46c9757a7a202a6d25de132c8fcef Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Sat, 7 Oct 2017 05:57:05 -0700 Subject: [PATCH 236/335] Add multiprocess version of xmap_reader to speedup training. Add seqbin data parser to adapt to internal 1w data training. --- data_utils/audio.py | 42 +++++++++++++++++++ data_utils/data.py | 56 +++++++++++++------------ data_utils/speech.py | 22 ++++++++-- data_utils/utility.py | 98 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 188 insertions(+), 30 deletions(-) diff --git a/data_utils/audio.py b/data_utils/audio.py index 30e25221c..895a7899c 100644 --- a/data_utils/audio.py +++ b/data_utils/audio.py @@ -5,6 +5,8 @@ from __future__ import print_function import numpy as np import io +import struct +import re import soundfile import resampy from scipy import signal @@ -114,6 +116,46 @@ class AudioSegment(object): data = sndfile.read(frames=end_frame - start_frame, dtype='float32') return cls(data, sample_rate) + @classmethod + def from_sequence_file(cls, filepath): + """Create audio segment from sequence file. + + :param filepath: Filepath of sequence file. + :type filepath: basestring + :return: Audio segment instance. + :rtype: AudioSegment + """ + # parse filepath + matches = re.match(r"(.+\.seqbin)_(\d+)", filepath) + if matches is None: + raise IOError("File type of %s is not supported" % filepath) + filename = matches.group(1) + fileno = int(matches.group(2)) + + # read headers + f = open(filename, 'rb') + version = f.read(4) + num_utterances = struct.unpack("i", f.read(4))[0] + bytes_per_header = struct.unpack("i", f.read(4))[0] + header_bytes = f.read(bytes_per_header * (num_utterances + 1)) + header = [ + struct.unpack("i", header_bytes[bytes_per_header * i: + bytes_per_header * (i + 1)])[0] + for i in range(num_utterances + 1) + ] + + # read audio bytes + f.seek(header[fileno - 1]) + audio_bytes = f.read(header[fileno] - header[fileno - 1]) + f.close() + + # create audio segment + try: + return cls.from_bytes(audio_bytes) + except Exception as e: + samples = np.frombuffer(audio_bytes, dtype='int16') + return cls(samples=samples, sample_rate=8000) + @classmethod def from_bytes(cls, bytes): """Create audio segment from a byte string containing audio samples. diff --git a/data_utils/data.py b/data_utils/data.py index 7ddf1f339..fca538175 100644 --- a/data_utils/data.py +++ b/data_utils/data.py @@ -7,11 +7,13 @@ from __future__ import print_function import random import tarfile +import re import multiprocessing import numpy as np import paddle.v2 as paddle from threading import local from data_utils.utility import read_manifest +from data_utils.utility import xmap_readers_mp from data_utils.augmentor.augmentation import AugmentationPipeline from data_utils.featurizer.speech_featurizer import SpeechFeaturizer from data_utils.speech import SpeechSegment @@ -100,7 +102,14 @@ class DataGenerator(object): transcription. :rtype: tuple of (2darray, list) """ - speech_segment = SpeechSegment.from_file(filename, transcript) + if filename.startswith('tar:'): + speech_segment = SpeechSegment.from_file( + self._subfile_from_tar(filename), transcript) + elif re.findall(r".seqbin_\d+$", filename): + speech_segment = SpeechSegment.from_sequence_file(filename, + transcript) + else: + speech_segment = SpeechSegment.from_file(filename, transcript) self._augmentation_pipeline.transform_audio(speech_segment) specgram, text_ids = self._speech_featurizer.featurize(speech_segment) specgram = self._normalizer.apply(specgram) @@ -231,27 +240,23 @@ class DataGenerator(object): result[tarinfo.name] = tarinfo return f, result - def _get_file_object(self, file): - """Get file object by file path. + def _subfile_from_tar(self, file): + """Get subfile object from tar. - If file startwith tar, it will return a tar file object + It will return a subfile object from tar file and cached tar file info for next reading request. - It will return file directly, if the type of file is not str. """ - if file.startswith('tar:'): - tarpath, filename = file.split(':', 1)[1].split('#', 1) - if 'tar2info' not in self._local_data.__dict__: - self._local_data.tar2info = {} - if 'tar2object' not in self._local_data.__dict__: - self._local_data.tar2object = {} - if tarpath not in self._local_data.tar2info: - object, infoes = self._parse_tar(tarpath) - self._local_data.tar2info[tarpath] = infoes - self._local_data.tar2object[tarpath] = object - return self._local_data.tar2object[tarpath].extractfile( - self._local_data.tar2info[tarpath][filename]) - else: - return open(file, 'r') + tarpath, filename = file.split(':', 1)[1].split('#', 1) + if 'tar2info' not in self._local_data.__dict__: + self._local_data.tar2info = {} + if 'tar2object' not in self._local_data.__dict__: + self._local_data.tar2object = {} + if tarpath not in self._local_data.tar2info: + object, infoes = self._parse_tar(tarpath) + self._local_data.tar2info[tarpath] = infoes + self._local_data.tar2object[tarpath] = object + return self._local_data.tar2object[tarpath].extractfile( + self._local_data.tar2info[tarpath][filename]) def _instance_reader_creator(self, manifest): """ @@ -266,13 +271,12 @@ class DataGenerator(object): for instance in manifest: yield instance - def mapper(instance): - return self.process_utterance( - self._get_file_object(instance["audio_filepath"]), - instance["text"]) - - return paddle.reader.xmap_readers( - mapper, reader, self._num_threads, 1024, order=True) + return xmap_readers_mp( + lambda instance: self.process_utterance(instance["audio_filepath"], instance["text"]), + reader, + self._num_threads, + 4096, + order=True) def _padding_batch(self, batch, padding_to=-1, flatten=False): """ diff --git a/data_utils/speech.py b/data_utils/speech.py index 17d68f315..623b38c24 100644 --- a/data_utils/speech.py +++ b/data_utils/speech.py @@ -44,12 +44,26 @@ class SpeechSegment(AudioSegment): :type filepath: basestring|file :param transcript: Transcript text for the speech. :type transript: basestring - :return: Audio segment instance. - :rtype: AudioSegment + :return: Speech segment instance. + :rtype: SpeechSegment """ audio = AudioSegment.from_file(filepath) return cls(audio.samples, audio.sample_rate, transcript) + @classmethod + def from_sequence_file(cls, filepath, transcript): + """Create speech segment from sequence file and transcript. + + :param filepath: Filepath of sequence file. + :type filepath: basestring + :param transcript: Transcript text for the speech. + :type transript: basestring + :return: Speech segment instance. + :rtype: SpeechSegment + """ + audio = AudioSegment.from_sequence_file(filepath) + return cls(audio.samples, audio.sample_rate, transcript) + @classmethod def from_bytes(cls, bytes, transcript): """Create speech segment from a byte string and corresponding @@ -59,8 +73,8 @@ class SpeechSegment(AudioSegment): :type bytes: str :param transcript: Transcript text for the speech. :type transript: basestring - :return: Audio segment instance. - :rtype: AudioSegment + :return: Speech segment instance. + :rtype: Speech Segment """ audio = AudioSegment.from_bytes(bytes) return cls(audio.samples, audio.sample_rate, transcript) diff --git a/data_utils/utility.py b/data_utils/utility.py index da7b66ef2..40e212c89 100644 --- a/data_utils/utility.py +++ b/data_utils/utility.py @@ -7,6 +7,9 @@ import json import codecs import os import tarfile +import time +from Queue import Queue +from multiprocessing import Process, Manager from paddle.v2.dataset.common import md5file @@ -61,3 +64,98 @@ def unpack(filepath, target_dir, rm_tar=False): tar.close() if rm_tar == True: os.remove(filepath) + + +class XmapEndSignal(): + pass + + +def xmap_readers_mp(mapper, reader, process_num, buffer_size, order=False): + """A multiprocessing pipeline wrapper for the data reader. + + :param mapper: Function to map sample. + :type mapper: callable + :param reader: Given data reader. + :type reader: callable + :param process_num: Number of processes in the pipeline + :type process_num: int + :param buffer_size: Maximal buffer size. + :type buffer_size: int + :param order: Reserve the order of samples from the given reader. + :type order: bool + :return: The wrappered reader + :rtype: callable + """ + end_flag = XmapEndSignal() + + # define a worker to read samples from reader to in_queue + def read_worker(reader, in_queue): + for sample in reader(): + in_queue.put(sample) + in_queue.put(end_flag) + + # define a worker to read samples from reader to in_queue with order flag + def order_read_worker(reader, in_queue): + for order_id, sample in enumerate(reader()): + in_queue.put((order_id, sample)) + in_queue.put(end_flag) + + # define a worker to handle samples from in_queue by mapper and put results to out_queue + def handle_worker(in_queue, out_queue, mapper): + sample = in_queue.get() + while not isinstance(sample, XmapEndSignal): + out_queue.put(mapper(sample)) + sample = in_queue.get() + in_queue.put(end_flag) + out_queue.put(end_flag) + + # define a worker to handle samples from in_queue by mapper and put results to out_queue with order + def order_handle_worker(in_queue, out_queue, mapper, out_order): + ins = in_queue.get() + while not isinstance(ins, XmapEndSignal): + order_id, sample = ins + result = mapper(sample) + while order_id != out_order[0]: + time.sleep(0.001) + out_queue.put(result) + out_order[0] += 1 + ins = in_queue.get() + in_queue.put(end_flag) + out_queue.put(end_flag) + + def xreader(): + # prepare shared memory + manager = Manager() + in_queue = manager.Queue(buffer_size) + out_queue = manager.Queue(buffer_size) + out_order = manager.list([0]) + + # start a read worker in a process + target = order_read_worker if order else read_worker + p = Process(target=target, args=(reader, in_queue)) + p.start() + + # start handle_workers with multiple processes + target = order_handle_worker if order else handle_worker + args = (in_queue, out_queue, mapper, out_order) if order else ( + in_queue, out_queue, mapper) + workers = [ + Process(target=target, args=args) for _ in xrange(process_num) + ] + for w in workers: + w.start() + + # get results + sample = out_queue.get() + while not isinstance(sample, XmapEndSignal): + yield sample + sample = out_queue.get() + finish = 1 + while finish < process_num: + sample = out_queue.get() + if isinstance(sample, XmapEndSignal): + finish += 1 + else: + yield sample + + return xreader From be1fbc68a15b2c7f6dee6f98bea84a4c201a863e Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Sat, 7 Oct 2017 06:40:03 -0700 Subject: [PATCH 237/335] Set process daemon property and reset default value of num_proc_data arguments. --- data_utils/utility.py | 2 ++ examples/aishell/run_train.sh | 2 +- examples/librispeech/run_train.sh | 2 +- test.py | 2 +- train.py | 2 +- 5 files changed, 6 insertions(+), 4 deletions(-) diff --git a/data_utils/utility.py b/data_utils/utility.py index 40e212c89..123348cbc 100644 --- a/data_utils/utility.py +++ b/data_utils/utility.py @@ -133,6 +133,7 @@ def xmap_readers_mp(mapper, reader, process_num, buffer_size, order=False): # start a read worker in a process target = order_read_worker if order else read_worker p = Process(target=target, args=(reader, in_queue)) + p.daemon = True p.start() # start handle_workers with multiple processes @@ -143,6 +144,7 @@ def xmap_readers_mp(mapper, reader, process_num, buffer_size, order=False): Process(target=target, args=args) for _ in xrange(process_num) ] for w in workers: + w.daemon = True w.start() # get results diff --git a/examples/aishell/run_train.sh b/examples/aishell/run_train.sh index 8e61ec3c2..afb7d2efb 100644 --- a/examples/aishell/run_train.sh +++ b/examples/aishell/run_train.sh @@ -9,7 +9,7 @@ python -u train.py \ --batch_size=64 \ --trainer_count=8 \ --num_passes=50 \ ---num_proc_data=12 \ +--num_proc_data=8 \ --num_conv_layers=2 \ --num_rnn_layers=3 \ --rnn_layer_size=1024 \ diff --git a/examples/librispeech/run_train.sh b/examples/librispeech/run_train.sh index 69251fe0c..073619c2a 100644 --- a/examples/librispeech/run_train.sh +++ b/examples/librispeech/run_train.sh @@ -9,7 +9,7 @@ python -u train.py \ --batch_size=512 \ --trainer_count=8 \ --num_passes=50 \ ---num_proc_data=12 \ +--num_proc_data=8 \ --num_conv_layers=2 \ --num_rnn_layers=3 \ --rnn_layer_size=2048 \ diff --git a/test.py b/test.py index 40f0795a1..51c725c5f 100644 --- a/test.py +++ b/test.py @@ -18,7 +18,7 @@ add_arg('batch_size', int, 128, "Minibatch size.") add_arg('trainer_count', int, 8, "# of Trainers (CPUs or GPUs).") add_arg('beam_size', int, 500, "Beam search width.") add_arg('num_proc_bsearch', int, 12, "# of CPUs for beam search.") -add_arg('num_proc_data', int, 12, "# of CPUs for data preprocessing.") +add_arg('num_proc_data', int, 4, "# of CPUs for data preprocessing.") add_arg('num_conv_layers', int, 2, "# of convolution layers.") add_arg('num_rnn_layers', int, 3, "# of recurrent layers.") add_arg('rnn_layer_size', int, 2048, "# of recurrent cells per layer.") diff --git a/train.py b/train.py index 44b2ccbc4..017cc73f6 100644 --- a/train.py +++ b/train.py @@ -16,7 +16,7 @@ add_arg = functools.partial(add_arguments, argparser=parser) add_arg('batch_size', int, 256, "Minibatch size.") add_arg('trainer_count', int, 8, "# of Trainers (CPUs or GPUs).") add_arg('num_passes', int, 200, "# of training epochs.") -add_arg('num_proc_data', int, 12, "# of CPUs for data preprocessing.") +add_arg('num_proc_data', int, 8, "# of CPUs for data preprocessing.") add_arg('num_conv_layers', int, 2, "# of convolution layers.") add_arg('num_rnn_layers', int, 3, "# of recurrent layers.") add_arg('rnn_layer_size', int, 2048, "# of recurrent cells per layer.") From 8e5c2eb9697e4b23d9283b6d885239423e5c65d3 Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Sun, 8 Oct 2017 05:21:44 -0700 Subject: [PATCH 238/335] Update by following reviewer's comments for pull request #355. --- data_utils/audio.py | 20 +++++++++++++++++++- data_utils/utility.py | 6 +----- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/data_utils/audio.py b/data_utils/audio.py index 895a7899c..01c064844 100644 --- a/data_utils/audio.py +++ b/data_utils/audio.py @@ -118,7 +118,25 @@ class AudioSegment(object): @classmethod def from_sequence_file(cls, filepath): - """Create audio segment from sequence file. + """Create audio segment from sequence file. Sequence file is a binary + file containing a collection of multiple audio files, with several + header bytes in the head indicating the offsets of each audio byte data + chunk. + + The format is: + + 4 bytes (int, version), + 4 bytes (int, num of utterance), + 4 bytes (int, bytes per header), + [bytes_per_header*(num_utterance+1)] bytes (offsets for each audio), + audio_bytes_data_of_1st_utterance, + audio_bytes_data_of_2nd_utterance, + ...... + + Sequence file name must end with ".seqbin". And the filename of the 5th + utterance's audio file in sequence file "xxx.seqbin" must be + "xxx.seqbin_5", with "5" indicating the utterance index within this + sequence file (starting from 1). :param filepath: Filepath of sequence file. :type filepath: basestring diff --git a/data_utils/utility.py b/data_utils/utility.py index 123348cbc..96df2485f 100644 --- a/data_utils/utility.py +++ b/data_utils/utility.py @@ -148,11 +148,7 @@ def xmap_readers_mp(mapper, reader, process_num, buffer_size, order=False): w.start() # get results - sample = out_queue.get() - while not isinstance(sample, XmapEndSignal): - yield sample - sample = out_queue.get() - finish = 1 + finish = 0 while finish < process_num: sample = out_queue.get() if isinstance(sample, XmapEndSignal): From 2e5e9b8c118bca2cb0abed3777d7b2d10d9843d2 Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Wed, 11 Oct 2017 04:17:27 -0700 Subject: [PATCH 239/335] Turn on rnn_use_batch of Paddle for accelartion. Improve xmap_reader_mp by adding a flush thread. --- data_utils/utility.py | 36 +++++++++++++++++++++++++++--------- infer.py | 4 +++- test.py | 4 +++- tools/tune.py | 4 +++- train.py | 1 + 5 files changed, 37 insertions(+), 12 deletions(-) diff --git a/data_utils/utility.py b/data_utils/utility.py index 96df2485f..49eed6d8d 100644 --- a/data_utils/utility.py +++ b/data_utils/utility.py @@ -9,6 +9,7 @@ import os import tarfile import time from Queue import Queue +from threading import Thread from multiprocessing import Process, Manager from paddle.v2.dataset.common import md5file @@ -100,7 +101,8 @@ def xmap_readers_mp(mapper, reader, process_num, buffer_size, order=False): in_queue.put((order_id, sample)) in_queue.put(end_flag) - # define a worker to handle samples from in_queue by mapper and put results to out_queue + # define a worker to handle samples from in_queue by mapper and put results + # to out_queue def handle_worker(in_queue, out_queue, mapper): sample = in_queue.get() while not isinstance(sample, XmapEndSignal): @@ -109,7 +111,8 @@ def xmap_readers_mp(mapper, reader, process_num, buffer_size, order=False): in_queue.put(end_flag) out_queue.put(end_flag) - # define a worker to handle samples from in_queue by mapper and put results to out_queue with order + # define a worker to handle samples from in_queue by mapper and put results + # to out_queue with order def order_handle_worker(in_queue, out_queue, mapper, out_order): ins = in_queue.get() while not isinstance(ins, XmapEndSignal): @@ -123,6 +126,18 @@ def xmap_readers_mp(mapper, reader, process_num, buffer_size, order=False): in_queue.put(end_flag) out_queue.put(end_flag) + # define a thread worker to flush samples from Manager.Queue to Queue + # for acceleration + def flush_worker(in_queue, out_queue): + finish = 0 + while finish < process_num: + sample = in_queue.get() + if isinstance(sample, XmapEndSignal): + finish += 1 + else: + out_queue.put(sample) + out_queue.put(end_flag) + def xreader(): # prepare shared memory manager = Manager() @@ -147,13 +162,16 @@ def xmap_readers_mp(mapper, reader, process_num, buffer_size, order=False): w.daemon = True w.start() + # start a thread to read data from slow Manager.Queue + flush_queue = Queue(buffer_size) + t = Thread(target=flush_worker, args=(out_queue, flush_queue)) + t.daemon = True + t.start() + # get results - finish = 0 - while finish < process_num: - sample = out_queue.get() - if isinstance(sample, XmapEndSignal): - finish += 1 - else: - yield sample + sample = flush_queue.get() + while not isinstance(sample, XmapEndSignal): + yield sample + sample = flush_queue.get() return xreader diff --git a/infer.py b/infer.py index e635f6d0f..5d9439cf2 100644 --- a/infer.py +++ b/infer.py @@ -116,7 +116,9 @@ def infer(): def main(): print_arguments(args) - paddle.init(use_gpu=args.use_gpu, trainer_count=args.trainer_count) + paddle.init(use_gpu=args.use_gpu, + rnn_use_batch=True, + trainer_count=args.trainer_count) infer() diff --git a/test.py b/test.py index 51c725c5f..1fe0fbb7c 100644 --- a/test.py +++ b/test.py @@ -119,7 +119,9 @@ def evaluate(): def main(): print_arguments(args) - paddle.init(use_gpu=args.use_gpu, trainer_count=args.trainer_count) + paddle.init(use_gpu=args.use_gpu, + rnn_use_batch=True, + trainer_count=args.trainer_count) evaluate() diff --git a/tools/tune.py b/tools/tune.py index 85c2d7388..83c71e7db 100644 --- a/tools/tune.py +++ b/tools/tune.py @@ -217,7 +217,9 @@ def tune(): def main(): print_arguments(args) - paddle.init(use_gpu=args.use_gpu, trainer_count=args.trainer_count) + paddle.init(use_gpu=args.use_gpu, + rnn_use_batch=True, + trainer_count=args.trainer_count) tune() diff --git a/train.py b/train.py index 017cc73f6..a9c715769 100644 --- a/train.py +++ b/train.py @@ -119,6 +119,7 @@ def train(): def main(): print_arguments(args) paddle.init(use_gpu=args.use_gpu, + rnn_use_batch=True, trainer_count=args.trainer_count, log_clipping=True) train() From 758e9887ca55a633a582a1cac1ba6048df78b696 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Wed, 11 Oct 2017 22:16:57 +0800 Subject: [PATCH 240/335] Bug fix for librispeech/run_train.sh. --- examples/librispeech/run_train.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/librispeech/run_train.sh b/examples/librispeech/run_train.sh index 073619c2a..4ca045c2e 100644 --- a/examples/librispeech/run_train.sh +++ b/examples/librispeech/run_train.sh @@ -24,7 +24,7 @@ python -u train.py \ --is_local=True \ --share_rnn_weights=True \ --train_manifest='data/librispeech/manifest.train' \ ---dev_manifest='data/librispeech/manifest.dev' \ +--dev_manifest='data/librispeech/manifest.dev-clean' \ --mean_std_path='data/librispeech/mean_std.npz' \ --vocab_path='data/librispeech/vocab.txt' \ --output_model_dir='./checkpoints/libri' \ From 86811af7a37729cbf5c4cce8bc4693e320e08d55 Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Wed, 11 Oct 2017 22:36:20 +0800 Subject: [PATCH 241/335] Reset default value of batch_size, nun_proc_data and fix an invalid url for DS2. --- examples/aishell/run_test.sh | 2 +- examples/aishell/run_test_golden.sh | 2 +- examples/aishell/run_train.sh | 2 +- examples/librispeech/run_test.sh | 2 +- examples/librispeech/run_test_golden.sh | 2 +- examples/librispeech/run_train.sh | 4 ++-- examples/librispeech/run_tune.sh | 2 +- examples/tiny/run_test.sh | 2 +- examples/tiny/run_test_golden.sh | 2 +- infer.py | 2 +- models/librispeech/download_model.sh | 2 +- test.py | 4 ++-- tools/tune.py | 5 +++-- train.py | 2 +- 14 files changed, 18 insertions(+), 17 deletions(-) diff --git a/examples/aishell/run_test.sh b/examples/aishell/run_test.sh index 266c73676..feec95cb4 100644 --- a/examples/aishell/run_test.sh +++ b/examples/aishell/run_test.sh @@ -18,7 +18,7 @@ python -u test.py \ --trainer_count=8 \ --beam_size=300 \ --num_proc_bsearch=8 \ ---num_proc_data=4 \ +--num_proc_data=8 \ --num_conv_layers=2 \ --num_rnn_layers=3 \ --rnn_layer_size=1024 \ diff --git a/examples/aishell/run_test_golden.sh b/examples/aishell/run_test_golden.sh index 50e7fe2c6..387d54f39 100644 --- a/examples/aishell/run_test_golden.sh +++ b/examples/aishell/run_test_golden.sh @@ -27,7 +27,7 @@ python -u test.py \ --trainer_count=8 \ --beam_size=300 \ --num_proc_bsearch=8 \ ---num_proc_data=4 \ +--num_proc_data=8 \ --num_conv_layers=2 \ --num_rnn_layers=3 \ --rnn_layer_size=1024 \ diff --git a/examples/aishell/run_train.sh b/examples/aishell/run_train.sh index afb7d2efb..077fabcd8 100644 --- a/examples/aishell/run_train.sh +++ b/examples/aishell/run_train.sh @@ -9,7 +9,7 @@ python -u train.py \ --batch_size=64 \ --trainer_count=8 \ --num_passes=50 \ ---num_proc_data=8 \ +--num_proc_data=16 \ --num_conv_layers=2 \ --num_rnn_layers=3 \ --rnn_layer_size=1024 \ diff --git a/examples/librispeech/run_test.sh b/examples/librispeech/run_test.sh index d79a22563..b67d25ed5 100644 --- a/examples/librispeech/run_test.sh +++ b/examples/librispeech/run_test.sh @@ -18,7 +18,7 @@ python -u test.py \ --trainer_count=8 \ --beam_size=500 \ --num_proc_bsearch=8 \ ---num_proc_data=4 \ +--num_proc_data=8 \ --num_conv_layers=2 \ --num_rnn_layers=3 \ --rnn_layer_size=2048 \ diff --git a/examples/librispeech/run_test_golden.sh b/examples/librispeech/run_test_golden.sh index 011cdd2ab..3e7e3b4c5 100644 --- a/examples/librispeech/run_test_golden.sh +++ b/examples/librispeech/run_test_golden.sh @@ -27,7 +27,7 @@ python -u test.py \ --trainer_count=8 \ --beam_size=500 \ --num_proc_bsearch=8 \ ---num_proc_data=4 \ +--num_proc_data=8 \ --num_conv_layers=2 \ --num_rnn_layers=3 \ --rnn_layer_size=2048 \ diff --git a/examples/librispeech/run_train.sh b/examples/librispeech/run_train.sh index 073619c2a..ea6a60823 100644 --- a/examples/librispeech/run_train.sh +++ b/examples/librispeech/run_train.sh @@ -6,10 +6,10 @@ cd ../.. > /dev/null # if you wish to resume from an exists model, uncomment --init_model_path CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ python -u train.py \ ---batch_size=512 \ +--batch_size=160 \ --trainer_count=8 \ --num_passes=50 \ ---num_proc_data=8 \ +--num_proc_data=16 \ --num_conv_layers=2 \ --num_rnn_layers=3 \ --rnn_layer_size=2048 \ diff --git a/examples/librispeech/run_tune.sh b/examples/librispeech/run_tune.sh index 78b2972d0..c3695d1cb 100644 --- a/examples/librispeech/run_tune.sh +++ b/examples/librispeech/run_tune.sh @@ -6,7 +6,7 @@ cd ../.. > /dev/null CUDA_VISIBLE_DEVICES=0,1,2,3 \ python -u tools/tune.py \ --num_batches=-1 \ ---batch_size=256 \ +--batch_size=128 \ --trainer_count=8 \ --beam_size=500 \ --num_proc_bsearch=12 \ diff --git a/examples/tiny/run_test.sh b/examples/tiny/run_test.sh index 690dfe739..27a3fc6f5 100644 --- a/examples/tiny/run_test.sh +++ b/examples/tiny/run_test.sh @@ -18,7 +18,7 @@ python -u test.py \ --trainer_count=8 \ --beam_size=500 \ --num_proc_bsearch=8 \ ---num_proc_data=4 \ +--num_proc_data=8 \ --num_conv_layers=2 \ --num_rnn_layers=3 \ --rnn_layer_size=2048 \ diff --git a/examples/tiny/run_test_golden.sh b/examples/tiny/run_test_golden.sh index 16adf2988..8c00895d5 100644 --- a/examples/tiny/run_test_golden.sh +++ b/examples/tiny/run_test_golden.sh @@ -27,7 +27,7 @@ python -u test.py \ --trainer_count=8 \ --beam_size=500 \ --num_proc_bsearch=8 \ ---num_proc_data=4 \ +--num_proc_data=8 \ --num_conv_layers=2 \ --num_rnn_layers=3 \ --rnn_layer_size=2048 \ diff --git a/infer.py b/infer.py index 5d9439cf2..a30d48d6d 100644 --- a/infer.py +++ b/infer.py @@ -17,7 +17,7 @@ add_arg = functools.partial(add_arguments, argparser=parser) add_arg('num_samples', int, 10, "# of samples to infer.") add_arg('trainer_count', int, 8, "# of Trainers (CPUs or GPUs).") add_arg('beam_size', int, 500, "Beam search width.") -add_arg('num_proc_bsearch', int, 12, "# of CPUs for beam search.") +add_arg('num_proc_bsearch', int, 8, "# of CPUs for beam search.") add_arg('num_conv_layers', int, 2, "# of convolution layers.") add_arg('num_rnn_layers', int, 3, "# of recurrent layers.") add_arg('rnn_layer_size', int, 2048, "# of recurrent cells per layer.") diff --git a/models/librispeech/download_model.sh b/models/librispeech/download_model.sh index 0048ff166..9c0ec2783 100644 --- a/models/librispeech/download_model.sh +++ b/models/librispeech/download_model.sh @@ -2,7 +2,7 @@ . ../../utils/utility.sh -URL='http://cloud.dlnel.org/filepub/?uuid=8e3cf742-2ff3-41ce-a49d-f6158cc06a23' +URL='http://cloud.dlnel.org/filepub/?uuid=6020a634-5399-4423-b021-c5ed32680fff' MD5=2ef08f8b608a7c555592161fc14d81a6 TARGET=./librispeech_model.tar.gz diff --git a/test.py b/test.py index 1fe0fbb7c..94c09150c 100644 --- a/test.py +++ b/test.py @@ -17,8 +17,8 @@ add_arg = functools.partial(add_arguments, argparser=parser) add_arg('batch_size', int, 128, "Minibatch size.") add_arg('trainer_count', int, 8, "# of Trainers (CPUs or GPUs).") add_arg('beam_size', int, 500, "Beam search width.") -add_arg('num_proc_bsearch', int, 12, "# of CPUs for beam search.") -add_arg('num_proc_data', int, 4, "# of CPUs for data preprocessing.") +add_arg('num_proc_bsearch', int, 8, "# of CPUs for beam search.") +add_arg('num_proc_data', int, 8, "# of CPUs for data preprocessing.") add_arg('num_conv_layers', int, 2, "# of convolution layers.") add_arg('num_rnn_layers', int, 3, "# of recurrent layers.") add_arg('rnn_layer_size', int, 2048, "# of recurrent cells per layer.") diff --git a/tools/tune.py b/tools/tune.py index 83c71e7db..233ec4ab8 100644 --- a/tools/tune.py +++ b/tools/tune.py @@ -27,7 +27,8 @@ add_arg('num_batches', int, -1, "# of batches tuning on. " add_arg('batch_size', int, 256, "# of samples per batch.") add_arg('trainer_count', int, 8, "# of Trainers (CPUs or GPUs).") add_arg('beam_size', int, 500, "Beam search width.") -add_arg('num_proc_bsearch', int, 12, "# of CPUs for beam search.") +add_arg('num_proc_bsearch', int, 8, "# of CPUs for beam search.") +add_arg('num_proc_data', int, 8, "# of CPUs for data preprocessing.") add_arg('num_conv_layers', int, 2, "# of convolution layers.") add_arg('num_rnn_layers', int, 3, "# of recurrent layers.") add_arg('rnn_layer_size', int, 2048, "# of recurrent cells per layer.") @@ -86,7 +87,7 @@ def tune(): mean_std_filepath=args.mean_std_path, augmentation_config='{}', specgram_type=args.specgram_type, - num_threads=1) + num_threads=args.num_proc_data) audio_data = paddle.layer.data( name="audio_spectrogram", diff --git a/train.py b/train.py index a9c715769..16415713f 100644 --- a/train.py +++ b/train.py @@ -16,7 +16,7 @@ add_arg = functools.partial(add_arguments, argparser=parser) add_arg('batch_size', int, 256, "Minibatch size.") add_arg('trainer_count', int, 8, "# of Trainers (CPUs or GPUs).") add_arg('num_passes', int, 200, "# of training epochs.") -add_arg('num_proc_data', int, 8, "# of CPUs for data preprocessing.") +add_arg('num_proc_data', int, 16, "# of CPUs for data preprocessing.") add_arg('num_conv_layers', int, 2, "# of convolution layers.") add_arg('num_rnn_layers', int, 3, "# of recurrent layers.") add_arg('rnn_layer_size', int, 2048, "# of recurrent cells per layer.") From e3bb689c0e022345e9833ebacdcba3935bc6b7a0 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Wed, 11 Oct 2017 22:48:00 +0800 Subject: [PATCH 242/335] Add document for Mandarin model. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index fca2528aa..0fcb73327 100644 --- a/README.md +++ b/README.md @@ -398,7 +398,7 @@ For more information about the DeepSpeech2 training on PaddleCloud, please refer ## Training for Mandarin Language -TODO: to be added +The steps of training, evaluation and inference for Mandarin ASR model is same with English ASR model. We have provided an example for Mandarin data which using Aishell dataset and you can find it in ```examples/aishell```. As mentioned above, you can execute ```sh run_data.sh```, ```sh run_train.sh```, ```sh run_test.sh``` and ```sh run_infer.sh``` to do data preparation, training, test and inference correspondingly. We have also tuned a setting to get better model performance (not the best), and you can execute ```sh run_infer_golden.sh``` to show some speech-to-text decoding results. ## Trying Live Demo with Your Own Voice From 9b7fc7e903ff408095c17d893cde9e9c9bd9b08d Mon Sep 17 00:00:00 2001 From: yangyaming Date: Thu, 12 Oct 2017 11:38:22 +0800 Subject: [PATCH 243/335] Add doc for Chinese LM. --- README.md | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index fca2528aa..efd45efb6 100644 --- a/README.md +++ b/README.md @@ -232,13 +232,13 @@ Now the preprocessing is done and we get a clean corpus to train the language mo #### Mandarin LM -Different from the English language model, Mandarin language model is character-based where each token is a Chinese character. We use an internal corpus to train the released Mandarin language model. This corpus contains billions of tokens. The preprocessing has tiny difference from English language model and main steps include: +Different from the English language model, Mandarin language model is character-based where each token is a Chinese character. We use internal corpus to train the released Mandarin language models. The corpus contain billions of tokens. The preprocessing has tiny difference from English language model and main steps include: * The beginning and trailing whitespace characters are removed. * English punctuations and Chinese punctuations are removed. * A whitespace character between two tokens is inserted. -Please notice that the released language model only contains Chinese simplified characters. After preprocessing done we can begin to train the language model. The key training arguments are '-o 5 --prune 0 1 2 4 4'. Please refer above section for the meaning of each argument. We also convert the arpa file to binary file using default settings. +Please notice that the released language models only contain Chinese simplified characters. After preprocessing done we can begin to train the language model. The key training arguments for small LM is '-o 5 --prune 0 1 2 4 4' and '-o 5' for large LM. Please refer above section for the meaning of each argument. We also convert the arpa file to binary file using default settings. ### Speech-to-text Inference @@ -459,10 +459,11 @@ Mandarin | [Internal Mandarin Model](to-be-added) | Baidu Mandarin Dataset | 291 #### Language Model Released -Language Model | Training Data | Token-based | Size | Filter Configuraiton -:-------------:| :------------:| :-----: | -----: | -----------------: -[English LM](http://paddlepaddle.bj.bcebos.com/model_zoo/speech/common_crawl_00.prune01111.trie.klm) | To Be Added | Word-based | 8.3 GB | To Be Added -[Mandarin LM](http://cloud.dlnel.org/filepub/?uuid=d21861e4-4ed6-45bb-ad8e-ae417a43195e) | To Be Added | Character-based | 2.8 GB | To Be Added +Language Model | Training Data | Token-based | Size | Description +:-------------:| :------------:| :-----: | -----: | :-----------------: +[English LM](http://paddlepaddle.bj.bcebos.com/model_zoo/speech/common_crawl_00.prune01111.trie.klm) | [en.00.deduped.xz](http://web-language-models.s3-website-us-east-1.amazonaws.com/ngrams/en/deduped/en.00.deduped.xz) | Word-based | 8.3 GB | pruned with 0 1 1 1 1
about 1.85 billion n-grams
'trie' binary with '-a 22 -q 8 -b 8' +[Mandarin LM Small](http://cloud.dlnel.org/filepub/?uuid=d21861e4-4ed6-45bb-ad8e-ae417a43195e) | Small internel data | Character-based | 2.8 GB | pruned with 0 1 2 4 4
about 0.13 billion n-grams
'probing' binary with default settings +Mandarin LM Large | Large internel data | Character-based | 70.4 GB | no pruning
about 3.7 billion n-grams
'probing' binary with default settings ## Experiments and Benchmarks From d78d4fa6ffa1a7978dfe9767fb1c3ec526e58a10 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Thu, 12 Oct 2017 17:06:08 +0800 Subject: [PATCH 244/335] Add url for large Mandarin LM. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index efd45efb6..8dd13f921 100644 --- a/README.md +++ b/README.md @@ -463,7 +463,7 @@ Language Model | Training Data | Token-based | Size | Description :-------------:| :------------:| :-----: | -----: | :-----------------: [English LM](http://paddlepaddle.bj.bcebos.com/model_zoo/speech/common_crawl_00.prune01111.trie.klm) | [en.00.deduped.xz](http://web-language-models.s3-website-us-east-1.amazonaws.com/ngrams/en/deduped/en.00.deduped.xz) | Word-based | 8.3 GB | pruned with 0 1 1 1 1
about 1.85 billion n-grams
'trie' binary with '-a 22 -q 8 -b 8' [Mandarin LM Small](http://cloud.dlnel.org/filepub/?uuid=d21861e4-4ed6-45bb-ad8e-ae417a43195e) | Small internel data | Character-based | 2.8 GB | pruned with 0 1 2 4 4
about 0.13 billion n-grams
'probing' binary with default settings -Mandarin LM Large | Large internel data | Character-based | 70.4 GB | no pruning
about 3.7 billion n-grams
'probing' binary with default settings +[Mandarin LM Large](http://cloud.dlnel.org/filepub/?uuid=245d02bb-cd01-4ebe-b079-b97be864ec37) | Large internel data | Character-based | 70.4 GB | no pruning
about 3.7 billion n-grams
'probing' binary with default settings ## Experiments and Benchmarks From 81207201daa3da1ae98195ef61116435448d69fc Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Sun, 15 Oct 2017 03:41:20 -0700 Subject: [PATCH 245/335] Fix a bug in running tools/compute_meanstd.py with seqbin data. --- data_utils/audio.py | 7 +++++-- data_utils/data.py | 4 ---- data_utils/speech.py | 14 -------------- 3 files changed, 5 insertions(+), 20 deletions(-) diff --git a/data_utils/audio.py b/data_utils/audio.py index 01c064844..3fb782951 100644 --- a/data_utils/audio.py +++ b/data_utils/audio.py @@ -65,8 +65,11 @@ class AudioSegment(object): :return: Audio segment instance. :rtype: AudioSegment """ - samples, sample_rate = soundfile.read(file, dtype='float32') - return cls(samples, sample_rate) + if isinstance(file, basestring) and re.findall(r".seqbin_\d+$", file): + return cls.from_sequence_file(file) + else: + samples, sample_rate = soundfile.read(file, dtype='float32') + return cls(samples, sample_rate) @classmethod def slice_from_file(cls, file, start=None, end=None): diff --git a/data_utils/data.py b/data_utils/data.py index fca538175..71ba2434f 100644 --- a/data_utils/data.py +++ b/data_utils/data.py @@ -7,7 +7,6 @@ from __future__ import print_function import random import tarfile -import re import multiprocessing import numpy as np import paddle.v2 as paddle @@ -105,9 +104,6 @@ class DataGenerator(object): if filename.startswith('tar:'): speech_segment = SpeechSegment.from_file( self._subfile_from_tar(filename), transcript) - elif re.findall(r".seqbin_\d+$", filename): - speech_segment = SpeechSegment.from_sequence_file(filename, - transcript) else: speech_segment = SpeechSegment.from_file(filename, transcript) self._augmentation_pipeline.transform_audio(speech_segment) diff --git a/data_utils/speech.py b/data_utils/speech.py index 623b38c24..0cea88730 100644 --- a/data_utils/speech.py +++ b/data_utils/speech.py @@ -50,20 +50,6 @@ class SpeechSegment(AudioSegment): audio = AudioSegment.from_file(filepath) return cls(audio.samples, audio.sample_rate, transcript) - @classmethod - def from_sequence_file(cls, filepath, transcript): - """Create speech segment from sequence file and transcript. - - :param filepath: Filepath of sequence file. - :type filepath: basestring - :param transcript: Transcript text for the speech. - :type transript: basestring - :return: Speech segment instance. - :rtype: SpeechSegment - """ - audio = AudioSegment.from_sequence_file(filepath) - return cls(audio.samples, audio.sample_rate, transcript) - @classmethod def from_bytes(cls, bytes, transcript): """Create speech segment from a byte string and corresponding From ad08dd963ae03950787f75b33d43df0da1fa9d82 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Mon, 16 Oct 2017 19:14:02 +0800 Subject: [PATCH 246/335] supply param cutoff_top_n in several scripts of DS2 --- examples/librispeech/run_test.sh | 1 + examples/mandarin/run_demo_server.sh | 1 + examples/tiny/run_infer.sh | 1 + examples/tiny/run_infer_golden.sh | 1 + examples/tiny/run_test.sh | 1 + examples/tiny/run_test_golden.sh | 1 + 6 files changed, 6 insertions(+) diff --git a/examples/librispeech/run_test.sh b/examples/librispeech/run_test.sh index b67d25ed5..0a76704d7 100644 --- a/examples/librispeech/run_test.sh +++ b/examples/librispeech/run_test.sh @@ -25,6 +25,7 @@ python -u test.py \ --alpha=2.15 \ --beta=0.35 \ --cutoff_prob=1.0 \ +--cutoff_top_n=40 \ --use_gru=False \ --use_gpu=True \ --share_rnn_weights=True \ diff --git a/examples/mandarin/run_demo_server.sh b/examples/mandarin/run_demo_server.sh index d98a99d01..ca28a98d5 100644 --- a/examples/mandarin/run_demo_server.sh +++ b/examples/mandarin/run_demo_server.sh @@ -32,6 +32,7 @@ python -u deploy/demo_server.py \ --alpha=0.36 \ --beta=0.25 \ --cutoff_prob=0.99 \ +--cutoff_top_n=40 \ --use_gru=False \ --use_gpu=True \ --share_rnn_weights=True \ diff --git a/examples/tiny/run_infer.sh b/examples/tiny/run_infer.sh index cbd049a6b..3a345f2ff 100644 --- a/examples/tiny/run_infer.sh +++ b/examples/tiny/run_infer.sh @@ -24,6 +24,7 @@ python -u infer.py \ --alpha=2.15 \ --beta=0.35 \ --cutoff_prob=1.0 \ +--cutoff_top_n=40 \ --use_gru=False \ --use_gpu=True \ --share_rnn_weights=True \ diff --git a/examples/tiny/run_infer_golden.sh b/examples/tiny/run_infer_golden.sh index 6567542be..72a8be064 100644 --- a/examples/tiny/run_infer_golden.sh +++ b/examples/tiny/run_infer_golden.sh @@ -33,6 +33,7 @@ python -u infer.py \ --alpha=2.15 \ --beta=0.35 \ --cutoff_prob=1.0 \ +--cutoff_top_n=40 \ --use_gru=False \ --use_gpu=True \ --share_rnn_weights=True \ diff --git a/examples/tiny/run_test.sh b/examples/tiny/run_test.sh index 27a3fc6f5..a58f5d100 100644 --- a/examples/tiny/run_test.sh +++ b/examples/tiny/run_test.sh @@ -25,6 +25,7 @@ python -u test.py \ --alpha=2.15 \ --beta=0.35 \ --cutoff_prob=1.0 \ +--cutoff_top_n=40 \ --use_gru=False \ --use_gpu=True \ --share_rnn_weights=True \ diff --git a/examples/tiny/run_test_golden.sh b/examples/tiny/run_test_golden.sh index 8c00895d5..8d3d25c5c 100644 --- a/examples/tiny/run_test_golden.sh +++ b/examples/tiny/run_test_golden.sh @@ -34,6 +34,7 @@ python -u test.py \ --alpha=2.15 \ --beta=0.35 \ --cutoff_prob=1.0 \ +--cutoff_top_n=40 \ --use_gru=False \ --use_gpu=True \ --share_rnn_weights=True \ From f23282dc36dfc98a0752750cb335d9bbd4b777a3 Mon Sep 17 00:00:00 2001 From: Peng Li Date: Wed, 18 Oct 2017 15:17:56 +0800 Subject: [PATCH 247/335] fix bug in saving model (invoke the wrong function) --- model_utils/model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/model_utils/model.py b/model_utils/model.py index 123eed9b4..5a0d8890d 100644 --- a/model_utils/model.py +++ b/model_utils/model.py @@ -111,7 +111,7 @@ class DeepSpeech2Model(object): output_model_path = os.path.join(output_model_dir, "params.latest.tar.gz") with gzip.open(output_model_path, 'w') as f: - self._parameters.to_tar(f) + trainer.save_parameter_to_tar(f) print("\nPass: %d, Batch: %d, TrainCost: %f" % (event.pass_id, event.batch_id + 1, cost_sum / cost_counter)) @@ -136,7 +136,7 @@ class DeepSpeech2Model(object): output_model_path = os.path.join( output_model_dir, "params.pass-%d.tar.gz" % event.pass_id) with gzip.open(output_model_path, 'w') as f: - self._parameters.to_tar(f) + trainer.save_parameter_to_tar(f) # run train trainer.train( From 39dbcb4dfb2a6e09bb2418d16445cd45631f8d24 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Mon, 23 Oct 2017 17:47:22 +0800 Subject: [PATCH 248/335] Give option to disable converting from transcription text to ids. --- data_utils/data.py | 13 ++++++++++--- data_utils/featurizer/speech_featurizer.py | 8 +++++--- deploy/demo_server.py | 3 ++- infer.py | 6 +++--- test.py | 6 +++--- tools/tune.py | 6 +++--- 6 files changed, 26 insertions(+), 16 deletions(-) diff --git a/data_utils/data.py b/data_utils/data.py index 71ba2434f..edd4047ef 100644 --- a/data_utils/data.py +++ b/data_utils/data.py @@ -55,6 +55,10 @@ class DataGenerator(object): :type num_threads: int :param random_seed: Random seed. :type random_seed: int + :param keep_transcription_text: If set to True, transcription text will + be passed forward directly without + converting to index sequence. + :type keep_transcription_text: bool """ def __init__(self, @@ -69,7 +73,8 @@ class DataGenerator(object): specgram_type='linear', use_dB_normalization=True, num_threads=multiprocessing.cpu_count() // 2, - random_seed=0): + random_seed=0, + keep_transcription_text=False): self._max_duration = max_duration self._min_duration = min_duration self._normalizer = FeatureNormalizer(mean_std_filepath) @@ -84,6 +89,7 @@ class DataGenerator(object): use_dB_normalization=use_dB_normalization) self._num_threads = num_threads self._rng = random.Random(random_seed) + self._keep_transcription_text = keep_transcription_text self._epoch = 0 # for caching tar files info self._local_data = local() @@ -107,9 +113,10 @@ class DataGenerator(object): else: speech_segment = SpeechSegment.from_file(filename, transcript) self._augmentation_pipeline.transform_audio(speech_segment) - specgram, text_ids = self._speech_featurizer.featurize(speech_segment) + specgram, transcript_part = self._speech_featurizer.featurize( + speech_segment, self._keep_transcription_text) specgram = self._normalizer.apply(specgram) - return specgram, text_ids + return specgram, transcript_part def batch_reader_creator(self, manifest_path, diff --git a/data_utils/featurizer/speech_featurizer.py b/data_utils/featurizer/speech_featurizer.py index a947588db..4555dc31d 100644 --- a/data_utils/featurizer/speech_featurizer.py +++ b/data_utils/featurizer/speech_featurizer.py @@ -60,12 +60,12 @@ class SpeechFeaturizer(object): target_dB=target_dB) self._text_featurizer = TextFeaturizer(vocab_filepath) - def featurize(self, speech_segment): + def featurize(self, speech_segment, keep_transcription_text): """Extract features for speech segment. 1. For audio parts, extract the audio features. - 2. For transcript parts, convert text string to a list of token indices - in char-level. + 2. For transcript parts, keep the original text or convert text string + to a list of token indices in char-level. :param audio_segment: Speech segment to extract features from. :type audio_segment: SpeechSegment @@ -74,6 +74,8 @@ class SpeechFeaturizer(object): :rtype: tuple """ audio_feature = self._audio_featurizer.featurize(speech_segment) + if keep_transcription_text: + return audio_feature, speech_segment.transcript text_ids = self._text_featurizer.featurize(speech_segment.transcript) return audio_feature, text_ids diff --git a/deploy/demo_server.py b/deploy/demo_server.py index b007c751e..3e81c0c5b 100644 --- a/deploy/demo_server.py +++ b/deploy/demo_server.py @@ -146,7 +146,8 @@ def start_server(): mean_std_filepath=args.mean_std_path, augmentation_config='{}', specgram_type=args.specgram_type, - num_threads=1) + num_threads=1, + keep_transcription_text=True) # prepare ASR model ds2_model = DeepSpeech2Model( vocab_size=data_generator.vocab_size, diff --git a/infer.py b/infer.py index a30d48d6d..74524602a 100644 --- a/infer.py +++ b/infer.py @@ -68,7 +68,8 @@ def infer(): mean_std_filepath=args.mean_std_path, augmentation_config='{}', specgram_type=args.specgram_type, - num_threads=1) + num_threads=1, + keep_transcription_text=True) batch_reader = data_generator.batch_reader_creator( manifest_path=args.infer_manifest, batch_size=args.num_samples, @@ -103,8 +104,7 @@ def infer(): error_rate_func = cer if args.error_rate_type == 'cer' else wer target_transcripts = [ - ''.join([data_generator.vocab_list[token] for token in transcript]) - for _, transcript in infer_data + transcript for _, transcript in infer_data ] for target, result in zip(target_transcripts, result_transcripts): print("\nTarget Transcription: %s\nOutput Transcription: %s" % diff --git a/test.py b/test.py index 94c09150c..5466f960b 100644 --- a/test.py +++ b/test.py @@ -69,7 +69,8 @@ def evaluate(): mean_std_filepath=args.mean_std_path, augmentation_config='{}', specgram_type=args.specgram_type, - num_threads=args.num_proc_data) + num_threads=args.num_proc_data, + keep_transcription_text=True) batch_reader = data_generator.batch_reader_creator( manifest_path=args.test_manifest, batch_size=args.batch_size, @@ -104,8 +105,7 @@ def evaluate(): language_model_path=args.lang_model_path, num_processes=args.num_proc_bsearch) target_transcripts = [ - ''.join([data_generator.vocab_list[token] for token in transcript]) - for _, transcript in infer_data + transcript for _, transcript in infer_data ] for target, result in zip(target_transcripts, result_transcripts): error_sum += error_rate_func(target, result) diff --git a/tools/tune.py b/tools/tune.py index 233ec4ab8..99ffb5f5d 100644 --- a/tools/tune.py +++ b/tools/tune.py @@ -87,7 +87,8 @@ def tune(): mean_std_filepath=args.mean_std_path, augmentation_config='{}', specgram_type=args.specgram_type, - num_threads=args.num_proc_data) + num_threads=args.num_proc_data, + keep_transcription_text=True) audio_data = paddle.layer.data( name="audio_spectrogram", @@ -164,8 +165,7 @@ def tune(): ] target_transcripts = [ - ''.join([data_generator.vocab_list[token] for token in transcript]) - for _, transcript in infer_data + transcript for _, transcript in infer_data ] num_ins += len(target_transcripts) From a0843941281f833010157f8f8680fe7a1a8fc2dd Mon Sep 17 00:00:00 2001 From: yangyaming Date: Fri, 3 Nov 2017 14:52:10 +0800 Subject: [PATCH 249/335] Add doc and adjust some codes. --- data_utils/data.py | 4 ++-- infer.py | 4 +--- test.py | 4 +--- tools/tune.py | 4 +--- 4 files changed, 5 insertions(+), 11 deletions(-) diff --git a/data_utils/data.py b/data_utils/data.py index edd4047ef..70ee6fbad 100644 --- a/data_utils/data.py +++ b/data_utils/data.py @@ -103,8 +103,8 @@ class DataGenerator(object): :type filename: basestring | file :param transcript: Transcription text. :type transcript: basestring - :return: Tuple of audio feature tensor and list of token ids for - transcription. + :return: Tuple of audio feature tensor and data of transcription part, + where transcription part could be token ids or text. :rtype: tuple of (2darray, list) """ if filename.startswith('tar:'): diff --git a/infer.py b/infer.py index 74524602a..9ac3e632e 100644 --- a/infer.py +++ b/infer.py @@ -103,9 +103,7 @@ def infer(): num_processes=args.num_proc_bsearch) error_rate_func = cer if args.error_rate_type == 'cer' else wer - target_transcripts = [ - transcript for _, transcript in infer_data - ] + target_transcripts = [transcript for _, transcript in infer_data] for target, result in zip(target_transcripts, result_transcripts): print("\nTarget Transcription: %s\nOutput Transcription: %s" % (target, result)) diff --git a/test.py b/test.py index 5466f960b..63fc4f65c 100644 --- a/test.py +++ b/test.py @@ -104,9 +104,7 @@ def evaluate(): vocab_list=vocab_list, language_model_path=args.lang_model_path, num_processes=args.num_proc_bsearch) - target_transcripts = [ - transcript for _, transcript in infer_data - ] + target_transcripts = [transcript for _, transcript in infer_data] for target, result in zip(target_transcripts, result_transcripts): error_sum += error_rate_func(target, result) num_ins += 1 diff --git a/tools/tune.py b/tools/tune.py index 99ffb5f5d..966029a82 100644 --- a/tools/tune.py +++ b/tools/tune.py @@ -164,9 +164,7 @@ def tune(): for i in xrange(len(infer_data)) ] - target_transcripts = [ - transcript for _, transcript in infer_data - ] + target_transcripts = [transcript for _, transcript in infer_data] num_ins += len(target_transcripts) # grid search From e8a5a17b1dee1853668b9cd6dcf22facc18c74ab Mon Sep 17 00:00:00 2001 From: yangyaming Date: Fri, 3 Nov 2017 15:09:03 +0800 Subject: [PATCH 250/335] Refine doc. --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 8dd13f921..c9a4e8d55 100644 --- a/README.md +++ b/README.md @@ -459,11 +459,11 @@ Mandarin | [Internal Mandarin Model](to-be-added) | Baidu Mandarin Dataset | 291 #### Language Model Released -Language Model | Training Data | Token-based | Size | Description -:-------------:| :------------:| :-----: | -----: | :-----------------: -[English LM](http://paddlepaddle.bj.bcebos.com/model_zoo/speech/common_crawl_00.prune01111.trie.klm) | [en.00.deduped.xz](http://web-language-models.s3-website-us-east-1.amazonaws.com/ngrams/en/deduped/en.00.deduped.xz) | Word-based | 8.3 GB | pruned with 0 1 1 1 1
about 1.85 billion n-grams
'trie' binary with '-a 22 -q 8 -b 8' -[Mandarin LM Small](http://cloud.dlnel.org/filepub/?uuid=d21861e4-4ed6-45bb-ad8e-ae417a43195e) | Small internel data | Character-based | 2.8 GB | pruned with 0 1 2 4 4
about 0.13 billion n-grams
'probing' binary with default settings -[Mandarin LM Large](http://cloud.dlnel.org/filepub/?uuid=245d02bb-cd01-4ebe-b079-b97be864ec37) | Large internel data | Character-based | 70.4 GB | no pruning
about 3.7 billion n-grams
'probing' binary with default settings +Language Model | Training Data | Token-based | Size | Descriptions +:-------------:| :------------:| :-----: | -----: | :----------------- +[English LM](http://paddlepaddle.bj.bcebos.com/model_zoo/speech/common_crawl_00.prune01111.trie.klm) | [CommonCrawl(en.00)](http://web-language-models.s3-website-us-east-1.amazonaws.com/ngrams/en/deduped/en.00.deduped.xz) | Word-based | 8.3 GB | Pruned with 0 1 1 1 1;
About 1.85 billion n-grams;
'trie' binary with '-a 22 -q 8 -b 8' +[Mandarin LM Small](http://cloud.dlnel.org/filepub/?uuid=d21861e4-4ed6-45bb-ad8e-ae417a43195e) | Baidu Internal Corpus | Char-based | 2.8 GB | Pruned with 0 1 2 4 4;
About 0.13 billion n-grams;
'probing' binary with default settings +[Mandarin LM Large](http://cloud.dlnel.org/filepub/?uuid=245d02bb-cd01-4ebe-b079-b97be864ec37) | Baidu Internal Corpus | Char-based | 70.4 GB | No Pruning;
About 3.7 billion n-grams;
'probing' binary with default settings ## Experiments and Benchmarks From 1f6a18e8e8e28be7c02ca315187d3d6b99b8f045 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Fri, 3 Nov 2017 15:50:38 +0800 Subject: [PATCH 251/335] Refine doc. --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 0fcb73327..622db5077 100644 --- a/README.md +++ b/README.md @@ -398,7 +398,8 @@ For more information about the DeepSpeech2 training on PaddleCloud, please refer ## Training for Mandarin Language -The steps of training, evaluation and inference for Mandarin ASR model is same with English ASR model. We have provided an example for Mandarin data which using Aishell dataset and you can find it in ```examples/aishell```. As mentioned above, you can execute ```sh run_data.sh```, ```sh run_train.sh```, ```sh run_test.sh``` and ```sh run_infer.sh``` to do data preparation, training, test and inference correspondingly. We have also tuned a setting to get better model performance (not the best), and you can execute ```sh run_infer_golden.sh``` to show some speech-to-text decoding results. +Before training model for Mandarin Language, mean stddev file and vocabulary file are also required. For mean stddev file, you can run ```tools/compute_mean_std.py``` to generate as above. However, the Mandarin vocabulary contains much more tokens than English vocabulary, but you can still run ```tools/build_vocab.py``` to generate it. The steps of training, evaluation and inference for Mandarin ASR model is same to English ASR model. Notice that, after training a model please run ```tools/tune.py``` to find an optimal setting for Language Model. +We have provided an example for Mandarin training with Aishell in ```examples/aishell```. As mentioned above, please execute ```sh run_data.sh```, ```sh run_train.sh```, ```sh run_test.sh``` and ```sh run_infer.sh``` to do data preparation, training, test and inference correspondingly. We have also prepared a pre-trained model (downloaded by ./models/aishell/download_model.sh) for users to try with ```sh run_infer_golden.sh``` and ```sh run_test_golden.sh```. ## Trying Live Demo with Your Own Voice From 3e048a3c9a4c0f9497efcbe15eaf55e09f368957 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Fri, 3 Nov 2017 17:42:22 +0800 Subject: [PATCH 252/335] Add the doc about docker running for DS2 --- README.md | 37 ++++++++++++++++++++++++++++++++----- 1 file changed, 32 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index fca2528aa..3218bb0a8 100644 --- a/README.md +++ b/README.md @@ -3,13 +3,13 @@ *DeepSpeech2 on PaddlePaddle* is an open-source implementation of end-to-end Automatic Speech Recognition (ASR) engine, based on [Baidu's Deep Speech 2 paper](http://proceedings.mlr.press/v48/amodei16.pdf), with [PaddlePaddle](https://github.com/PaddlePaddle/Paddle) platform. Our vision is to empower both industrial application and academic research on speech recognition, via an easy-to-use, efficient and scalable implementation, including training, inference & testing module, distributed [PaddleCloud](https://github.com/PaddlePaddle/cloud) training, and demo deployment. Besides, several pre-trained models for both English and Mandarin are also released. ## Table of Contents -- [Prerequisites](#prerequisites) - [Installation](#installation) - [Getting Started](#getting-started) - [Data Preparation](#data-preparation) - [Training a Model](#training-a-model) - [Data Augmentation Pipeline](#data-augmentation-pipeline) - [Inference and Evaluation](#inference-and-evaluation) +- [Running in Docker Container](#running-in-docker-container) - [Distributed Cloud Training](#distributed-cloud-training) - [Hyper-parameters Tuning](#hyper-parameters-tuning) - [Training for Mandarin Language](#training-for-mandarin-language) @@ -18,13 +18,17 @@ - [Experiments and Benchmarks](#experiments-and-benchmarks) - [Questions and Help](#questions-and-help) -## Prerequisites -- Python 2.7 only supported -- PaddlePaddle the latest version (please refer to the [Installation Guide](https://github.com/PaddlePaddle/Paddle#installation)) + ## Installation -Please make sure the above [prerequisites](#prerequisites) have been satisfied before moving on. +To avoid the trouble of environment setup, [running in docker container](#Running-in-Docker-Container) is highly recommended. Otherwise follow the guidelines below to install the dependencies manually. + +### Prerequisites +- Python 2.7 only supported +- PaddlePaddle the latest version (please refer to the [Installation Guide](https://github.com/PaddlePaddle/Paddle#installation)) + +### Setup ```bash git clone https://github.com/PaddlePaddle/models.git @@ -331,7 +335,30 @@ python tune.py --help ``` or refer to `example/librispeech/run_tune.sh`. +## Running in Docker Container + +Docker is an open tool to build, ship, and run distributed applications in an isolated environment. A Docker image for this project has been provided in [hub.docker.com](https://hub.docker.com) with all the dependencies installed, including the pre-built PaddlePaddle, CTC decoders, and other necessary Python and third-party packages. This Docker image requires the support of NVIDIA GPU, so please make sure its availiability and the [nvidia-docker](https://github.com/NVIDIA/nvidia-docker) has been installed. + +Take several steps to launch the Docker image: +- Download the Docker image + +```bash +nvidia-docker pull paddlepaddle/models:deep-speech-2 +``` + +- Clone this repository + +``` +git clone https://github.com/PaddlePaddle/models.git +``` + +- Run the Docker image + +```bash +sudo nvidia-docker run -it -v $(pwd)/models:/models paddlepaddle/models:deep-speech-2 /bin/bash +``` +Now go back and start from the [Getting Started](#getting-started) section, you can execute training, inference and hyper-parameters tuning similary in the Docker container. ## Distributed Cloud Training From 6f90a33f1f02f917035ba860aa22201424c3dc18 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Fri, 3 Nov 2017 18:04:33 +0800 Subject: [PATCH 253/335] Update the doc about docker running for DS2 --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 3218bb0a8..5507a17b2 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ ## Installation -To avoid the trouble of environment setup, [running in docker container](#Running-in-Docker-Container) is highly recommended. Otherwise follow the guidelines below to install the dependencies manually. +To avoid the trouble of environment setup, [running in docker container](#running-in-docker-container) is highly recommended. Otherwise follow the guidelines below to install the dependencies manually. ### Prerequisites - Python 2.7 only supported @@ -337,7 +337,7 @@ or refer to `example/librispeech/run_tune.sh`. ## Running in Docker Container -Docker is an open tool to build, ship, and run distributed applications in an isolated environment. A Docker image for this project has been provided in [hub.docker.com](https://hub.docker.com) with all the dependencies installed, including the pre-built PaddlePaddle, CTC decoders, and other necessary Python and third-party packages. This Docker image requires the support of NVIDIA GPU, so please make sure its availiability and the [nvidia-docker](https://github.com/NVIDIA/nvidia-docker) has been installed. +Docker is an open source tool to build, ship, and run distributed applications in an isolated environment. A Docker image for this project has been provided in [hub.docker.com](https://hub.docker.com) with all the dependencies installed, including the pre-built PaddlePaddle, CTC decoders, and other necessary Python and third-party packages. This Docker image requires the support of NVIDIA GPU, so please make sure its availiability and the [nvidia-docker](https://github.com/NVIDIA/nvidia-docker) has been installed. Take several steps to launch the Docker image: @@ -358,7 +358,7 @@ git clone https://github.com/PaddlePaddle/models.git ```bash sudo nvidia-docker run -it -v $(pwd)/models:/models paddlepaddle/models:deep-speech-2 /bin/bash ``` -Now go back and start from the [Getting Started](#getting-started) section, you can execute training, inference and hyper-parameters tuning similary in the Docker container. +Now go back and start from the [Getting Started](#getting-started) section, you can execute training, inference and hyper-parameters tuning similarly in the Docker container. ## Distributed Cloud Training From 963b60d5edacb5b14abe43b663977973c9dd406d Mon Sep 17 00:00:00 2001 From: yangyaming Date: Fri, 3 Nov 2017 22:04:23 +0800 Subject: [PATCH 254/335] Refine doc for Mandarin training. --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index 622db5077..d33522425 100644 --- a/README.md +++ b/README.md @@ -398,8 +398,7 @@ For more information about the DeepSpeech2 training on PaddleCloud, please refer ## Training for Mandarin Language -Before training model for Mandarin Language, mean stddev file and vocabulary file are also required. For mean stddev file, you can run ```tools/compute_mean_std.py``` to generate as above. However, the Mandarin vocabulary contains much more tokens than English vocabulary, but you can still run ```tools/build_vocab.py``` to generate it. The steps of training, evaluation and inference for Mandarin ASR model is same to English ASR model. Notice that, after training a model please run ```tools/tune.py``` to find an optimal setting for Language Model. -We have provided an example for Mandarin training with Aishell in ```examples/aishell```. As mentioned above, please execute ```sh run_data.sh```, ```sh run_train.sh```, ```sh run_test.sh``` and ```sh run_infer.sh``` to do data preparation, training, test and inference correspondingly. We have also prepared a pre-trained model (downloaded by ./models/aishell/download_model.sh) for users to try with ```sh run_infer_golden.sh``` and ```sh run_test_golden.sh```. +The key steps of training for Mandarin Language are same to that of English Language and we have also provided an example for Mandarin training with Aishell in ```examples/aishell```. As mentioned above, please execute ```sh run_data.sh```, ```sh run_train.sh```, ```sh run_test.sh``` and ```sh run_infer.sh``` to do data preparation, training, test and inference correspondingly. We have also prepared a pre-trained model (downloaded by ./models/aishell/download_model.sh) for users to try with ```sh run_infer_golden.sh``` and ```sh run_test_golden.sh```. Notice that, different from English LM, the Mandarin LM is character based and please run ```tools/tune.py``` to find an optimal setting. ## Trying Live Demo with Your Own Voice From 046f6ca994f2afa5fe5a23fd3400becf48b9e3f4 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Fri, 3 Nov 2017 22:46:25 +0800 Subject: [PATCH 255/335] Refine doc. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index d33522425..84d9754d4 100644 --- a/README.md +++ b/README.md @@ -398,7 +398,7 @@ For more information about the DeepSpeech2 training on PaddleCloud, please refer ## Training for Mandarin Language -The key steps of training for Mandarin Language are same to that of English Language and we have also provided an example for Mandarin training with Aishell in ```examples/aishell```. As mentioned above, please execute ```sh run_data.sh```, ```sh run_train.sh```, ```sh run_test.sh``` and ```sh run_infer.sh``` to do data preparation, training, test and inference correspondingly. We have also prepared a pre-trained model (downloaded by ./models/aishell/download_model.sh) for users to try with ```sh run_infer_golden.sh``` and ```sh run_test_golden.sh```. Notice that, different from English LM, the Mandarin LM is character based and please run ```tools/tune.py``` to find an optimal setting. +The key steps of training for Mandarin language are same to that of English language and we have also provided an example for Mandarin training with Aishell in ```examples/aishell```. As mentioned above, please execute ```sh run_data.sh```, ```sh run_train.sh```, ```sh run_test.sh``` and ```sh run_infer.sh``` to do data preparation, training, testing and inference correspondingly. We have also prepared a pre-trained model (downloaded by ./models/aishell/download_model.sh) for users to try with ```sh run_infer_golden.sh``` and ```sh run_test_golden.sh```. Notice that, different from English LM, the Mandarin LM is character-based and please run ```tools/tune.py``` to find an optimal setting. ## Trying Live Demo with Your Own Voice From 84155d15483ca152234902a4110f8839ffae1aff Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Mon, 6 Nov 2017 14:56:02 +0800 Subject: [PATCH 256/335] Update DS2 benchmark results. --- README.md | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index fca2528aa..a5910299f 100644 --- a/README.md +++ b/README.md @@ -450,12 +450,12 @@ python deploy/demo_client.py --help #### Speech Model Released -Language | Model Name | Training Data | Training Hours +Language | Model Name | Training Data | Hours of Speech :-----------: | :------------: | :----------: | -------: English | [LibriSpeech Model](http://cloud.dlnel.org/filepub/?uuid=17404caf-cf19-492f-9707-1fad07c19aae) | [LibriSpeech Dataset](http://www.openslr.org/12/) | 960 h -English | [Internal English Model](to-be-added) | Baidu English Dataset | 8628 h +English | [BaiduEng8k Model](to-be-added) | Baidu Internal English Dataset | 8628 h Mandarin | [Aishell Model](http://cloud.dlnel.org/filepub/?uuid=6c83b9d8-3255-4adf-9726-0fe0be3d0274) | [Aishell Dataset](http://www.openslr.org/33/) | 151 h -Mandarin | [Internal Mandarin Model](to-be-added) | Baidu Mandarin Dataset | 2917 h +Mandarin | [BaiduChi1.2k Model](to-be-added) | Baidu Internal Mandarin Dataset | 1204 h #### Language Model Released @@ -466,23 +466,23 @@ Language Model | Training Data | Token-based | Size | Filter Configuraiton ## Experiments and Benchmarks -#### English Model Evaluation (Word Error Rate) +#### Benchmark Results for English Models (Word Error Rate) -Test Set | LibriSpeech Model | Internal English Model -:---------------------: | ---------------: | -------------------: -LibriSpeech-Test-Clean | 7.96 | X.X -LibriSpeech-Test-Other | 23.87 | X.X -VoxForge-Test | X.X | X.X -Baidu-English-Test | X.X | X.X +Test Set | LibriSpeech Model | BaiduEng8K Model +:--------------------- | ---------------: | -------------------: +LibriSpeech Test-Clean | 7.96 | 6.85 +LibriSpeech Test-Other | 23.87 | 17.16 +VoxForge American-Canadian |- |   8.10 +VoxForge Commonwealth | - | 17.35 +VoxForge European | - | 21.47 +VoxForge Indian | - | 30.39 +Baidu Internal Testset  |   - |   9.56 -(Beam size=2000) +#### Benchmark Results for Mandarin Model (Character Error Rate) -#### Mandarin Model Evaluation (Character Error Rate) - -Test Set | Aishell Model | Internal Mandarin Model -:---------------------: | :---------------: | :-------------------: -Aishell-Test | X.X | X.X -Baidu-Mandarin-Test | X.X | X.X +Test Set | Aishell Model | BaiduChi1.2k Model +:--------------------- | ---------------: | -------------------: +Baidu Internal Testset | - | 15.49 #### Acceleration with Multi-GPUs From 42ef8b3be392ce8c5bfefd2d1e78ab994d556b1e Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Mon, 6 Nov 2017 15:08:47 +0800 Subject: [PATCH 257/335] Rename: Eng --> EN, Chi --> CN --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index a5910299f..e79cbfab2 100644 --- a/README.md +++ b/README.md @@ -453,9 +453,9 @@ python deploy/demo_client.py --help Language | Model Name | Training Data | Hours of Speech :-----------: | :------------: | :----------: | -------: English | [LibriSpeech Model](http://cloud.dlnel.org/filepub/?uuid=17404caf-cf19-492f-9707-1fad07c19aae) | [LibriSpeech Dataset](http://www.openslr.org/12/) | 960 h -English | [BaiduEng8k Model](to-be-added) | Baidu Internal English Dataset | 8628 h +English | [BaiduEN8k Model](to-be-added) | Baidu Internal English Dataset | 8628 h Mandarin | [Aishell Model](http://cloud.dlnel.org/filepub/?uuid=6c83b9d8-3255-4adf-9726-0fe0be3d0274) | [Aishell Dataset](http://www.openslr.org/33/) | 151 h -Mandarin | [BaiduChi1.2k Model](to-be-added) | Baidu Internal Mandarin Dataset | 1204 h +Mandarin | [BaiduCN1.2k Model](to-be-added) | Baidu Internal Mandarin Dataset | 1204 h #### Language Model Released @@ -468,7 +468,7 @@ Language Model | Training Data | Token-based | Size | Filter Configuraiton #### Benchmark Results for English Models (Word Error Rate) -Test Set | LibriSpeech Model | BaiduEng8K Model +Test Set | LibriSpeech Model | BaiduEN8K Model :--------------------- | ---------------: | -------------------: LibriSpeech Test-Clean | 7.96 | 6.85 LibriSpeech Test-Other | 23.87 | 17.16 @@ -480,7 +480,7 @@ Baidu Internal Testset  |   - |   9.56 #### Benchmark Results for Mandarin Model (Character Error Rate) -Test Set | Aishell Model | BaiduChi1.2k Model +Test Set | Aishell Model | BaiduCN1.2k Model :--------------------- | ---------------: | -------------------: Baidu Internal Testset | - | 15.49 From d58dd5fe72115b0c2c609f4a35814978a9825d84 Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Mon, 6 Nov 2017 15:38:51 +0800 Subject: [PATCH 258/335] Release BaiduEn8k model and prepare demo example. --- .../run_demo_client.sh | 0 .../run_english_demo_server.sh} | 20 +++++++++---------- models/baidu_en8k/download_model.sh | 19 ++++++++++++++++++ 3 files changed, 29 insertions(+), 10 deletions(-) rename examples/{mandarin => deploy_demo}/run_demo_client.sh (100%) rename examples/{mandarin/run_demo_server.sh => deploy_demo/run_english_demo_server.sh} (73%) create mode 100644 models/baidu_en8k/download_model.sh diff --git a/examples/mandarin/run_demo_client.sh b/examples/deploy_demo/run_demo_client.sh similarity index 100% rename from examples/mandarin/run_demo_client.sh rename to examples/deploy_demo/run_demo_client.sh diff --git a/examples/mandarin/run_demo_server.sh b/examples/deploy_demo/run_english_demo_server.sh similarity index 73% rename from examples/mandarin/run_demo_server.sh rename to examples/deploy_demo/run_english_demo_server.sh index ca28a98d5..67532770c 100644 --- a/examples/mandarin/run_demo_server.sh +++ b/examples/deploy_demo/run_english_demo_server.sh @@ -13,7 +13,7 @@ cd - > /dev/null # download well-trained model -cd models/librispeech > /dev/null +cd models/baidu_en8k > /dev/null sh download_model.sh if [ $? -ne 0 ]; then exit 1 @@ -28,19 +28,19 @@ python -u deploy/demo_server.py \ --host_port=8086 \ --num_conv_layers=2 \ --num_rnn_layers=3 \ ---rnn_layer_size=2048 \ ---alpha=0.36 \ ---beta=0.25 \ ---cutoff_prob=0.99 \ +--rnn_layer_size=1024 \ +--alpha=1.15 \ +--beta=0.15 \ +--cutoff_prob=1.0 \ --cutoff_top_n=40 \ ---use_gru=False \ +--use_gru=True \ --use_gpu=True \ ---share_rnn_weights=True \ +--share_rnn_weights=False \ --speech_save_dir='demo_cache' \ --warmup_manifest='data/tiny/manifest.test-clean' \ ---mean_std_path='models/librispeech/mean_std.npz' \ ---vocab_path='models/librispeech/vocab.txt' \ ---model_path='models/librispeech/params.tar.gz' \ +--mean_std_path='models/baidu_en8k/mean_std.npz' \ +--vocab_path='models/baidu_en8k/vocab.txt' \ +--model_path='models/baidu_en8k/params.tar.gz' \ --lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \ --decoding_method='ctc_beam_search' \ --specgram_type='linear' diff --git a/models/baidu_en8k/download_model.sh b/models/baidu_en8k/download_model.sh new file mode 100644 index 000000000..7d92fd52e --- /dev/null +++ b/models/baidu_en8k/download_model.sh @@ -0,0 +1,19 @@ +#! /usr/bin/env bash + +. ../../utils/utility.sh + +URL='To-be-added' +MD5=a19d40cb3b558eb696c44d883f32cfda +TARGET=./baidu_en8k_model.tar.gz + + +echo "Download BaiduEn8k model ..." +download $URL $MD5 $TARGET +if [ $? -ne 0 ]; then + echo "Fail to download BaiduEn8k model!" + exit 1 +fi +tar -zxvf $TARGET + + +exit 0 From f8da5127fe3eb37d53004fcab2feaea339329a65 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Mon, 6 Nov 2017 15:49:17 +0800 Subject: [PATCH 259/335] Update benchmark results for LibriSpeech model --- README.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 69fa13944..0466a5329 100644 --- a/README.md +++ b/README.md @@ -498,13 +498,13 @@ Language Model | Training Data | Token-based | Size | Descriptions Test Set | LibriSpeech Model | BaiduEN8K Model :--------------------- | ---------------: | -------------------: -LibriSpeech Test-Clean | 7.96 | 6.85 -LibriSpeech Test-Other | 23.87 | 17.16 -VoxForge American-Canadian |- |   8.10 -VoxForge Commonwealth | - | 17.35 -VoxForge European | - | 21.47 -VoxForge Indian | - | 30.39 -Baidu Internal Testset  |   - |   9.56 +LibriSpeech Test-Clean | 8.06 | 6.85 +LibriSpeech Test-Other | 24.25 | 17.16 +VoxForge American-Canadian | 13.48 |   8.10 +VoxForge Commonwealth | 22.37 | 17.35 +VoxForge European | 32.64 | 21.47 +VoxForge Indian | 58.48 | 30.39 +Baidu Internal Testset  |   48.93 |   9.56 #### Benchmark Results for Mandarin Model (Character Error Rate) From 921c6d0cc1d9f2595edd710db3a7770e7d392988 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Mon, 6 Nov 2017 23:20:20 +0800 Subject: [PATCH 260/335] Add the scoring of last word/char of prefixes in CTC beam search decoder --- decoders/swig/ctc_beam_search_decoder.cpp | 28 +++++++++++++++++++---- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/decoders/swig/ctc_beam_search_decoder.cpp b/decoders/swig/ctc_beam_search_decoder.cpp index 624784b05..0f33e8573 100644 --- a/decoders/swig/ctc_beam_search_decoder.cpp +++ b/decoders/swig/ctc_beam_search_decoder.cpp @@ -110,17 +110,17 @@ std::vector> ctc_beam_search_decoder( // language model scoring if (ext_scorer != nullptr && (c == space_id || ext_scorer->is_character_based())) { - PathTrie *prefix_toscore = nullptr; + PathTrie *prefix_to_score = nullptr; // skip scoring the space if (ext_scorer->is_character_based()) { - prefix_toscore = prefix_new; + prefix_to_score = prefix_new; } else { - prefix_toscore = prefix; + prefix_to_score = prefix; } - double score = 0.0; + float score = 0.0; std::vector ngram; - ngram = ext_scorer->make_ngram(prefix_toscore); + ngram = ext_scorer->make_ngram(prefix_to_score); score = ext_scorer->get_log_cond_prob(ngram) * ext_scorer->alpha; log_p += score; log_p += ext_scorer->beta; @@ -131,6 +131,7 @@ std::vector> ctc_beam_search_decoder( } // end of loop over prefix } // end of loop over vocabulary + prefixes.clear(); // update log probs root.iterate_to_vec(prefixes); @@ -147,6 +148,23 @@ std::vector> ctc_beam_search_decoder( } } // end of loop over time + // score the last word/character of each prefix + if (ext_scorer != nullptr) { + for (size_t i = 0; i < beam_size && i < prefixes.size(); ++i) { + auto prefix = prefixes[i]; + if (prefix->character != space_id && !prefix->is_empty()) { + float score = 0.0; + std::vector ngram = ext_scorer->make_ngram(prefix); + score = ext_scorer->get_log_cond_prob(ngram) * ext_scorer->alpha; + score += ext_scorer->beta; + prefix->score += score; + } + } + } + + size_t num_prefixes = std::min(prefixes.size(), beam_size); + std::sort(prefixes.begin(), prefixes.begin() + num_prefixes, prefix_compare); + // compute aproximate ctc score as the return score, without affecting the // return order of decoding result. To delete when decoder gets stable. for (size_t i = 0; i < beam_size && i < prefixes.size(); ++i) { From fcd614970458f1814d80d95a9899151328c8dce5 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Mon, 6 Nov 2017 23:46:44 +0800 Subject: [PATCH 261/335] Update benchmark results for BaiduEN8K model due to #427 --- README.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 0466a5329..0caa617eb 100644 --- a/README.md +++ b/README.md @@ -498,13 +498,13 @@ Language Model | Training Data | Token-based | Size | Descriptions Test Set | LibriSpeech Model | BaiduEN8K Model :--------------------- | ---------------: | -------------------: -LibriSpeech Test-Clean | 8.06 | 6.85 -LibriSpeech Test-Other | 24.25 | 17.16 -VoxForge American-Canadian | 13.48 |   8.10 -VoxForge Commonwealth | 22.37 | 17.35 -VoxForge European | 32.64 | 21.47 -VoxForge Indian | 58.48 | 30.39 -Baidu Internal Testset  |   48.93 |   9.56 +LibriSpeech Test-Clean | 8.06 | 6.63 +LibriSpeech Test-Other | 24.25 | 16.59 +VoxForge American-Canadian | 13.48 |   7.46 +VoxForge Commonwealth | 22.37 | 16.23 +VoxForge European | 32.64 | 20.47 +VoxForge Indian | 58.48 | 28.15 +Baidu Internal Testset  |   48.93 |   8.92 #### Benchmark Results for Mandarin Model (Character Error Rate) From 4b20a7029c24a9645dea56682d527b8137150cd2 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Tue, 7 Nov 2017 17:10:15 +0800 Subject: [PATCH 262/335] skip scoring the end when using character-based scorer --- decoders/swig/ctc_beam_search_decoder.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/decoders/swig/ctc_beam_search_decoder.cpp b/decoders/swig/ctc_beam_search_decoder.cpp index 0f33e8573..4a63af26a 100644 --- a/decoders/swig/ctc_beam_search_decoder.cpp +++ b/decoders/swig/ctc_beam_search_decoder.cpp @@ -148,11 +148,11 @@ std::vector> ctc_beam_search_decoder( } } // end of loop over time - // score the last word/character of each prefix - if (ext_scorer != nullptr) { + // score the last word of each prefix that doesn't end with space + if (ext_scorer != nullptr && !ext_scorer->is_character_based()) { for (size_t i = 0; i < beam_size && i < prefixes.size(); ++i) { auto prefix = prefixes[i]; - if (prefix->character != space_id && !prefix->is_empty()) { + if (!prefix->is_empty() && prefix->character != space_id) { float score = 0.0; std::vector ngram = ext_scorer->make_ngram(prefix); score = ext_scorer->get_log_cond_prob(ngram) * ext_scorer->alpha; From a0d1146be70b8a9dfb9c16e1744a2022aa05854d Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Wed, 8 Nov 2017 14:46:12 +0800 Subject: [PATCH 263/335] Update benchmark results for LibriSpeech model due to #427 --- README.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 0caa617eb..543af0ad1 100644 --- a/README.md +++ b/README.md @@ -498,13 +498,13 @@ Language Model | Training Data | Token-based | Size | Descriptions Test Set | LibriSpeech Model | BaiduEN8K Model :--------------------- | ---------------: | -------------------: -LibriSpeech Test-Clean | 8.06 | 6.63 -LibriSpeech Test-Other | 24.25 | 16.59 -VoxForge American-Canadian | 13.48 |   7.46 -VoxForge Commonwealth | 22.37 | 16.23 -VoxForge European | 32.64 | 20.47 -VoxForge Indian | 58.48 | 28.15 -Baidu Internal Testset  |   48.93 |   8.92 +LibriSpeech Test-Clean | 7.77 | 6.63 +LibriSpeech Test-Other | 23.25 | 16.59 +VoxForge American-Canadian | 12.52 |   7.46 +VoxForge Commonwealth | 21.08 | 16.23 +VoxForge European | 31.21 | 20.47 +VoxForge Indian | 56.79 | 28.15 +Baidu Internal Testset  |   47.73 |   8.92 #### Benchmark Results for Mandarin Model (Character Error Rate) From b3ebf3fd620324b83859c66dd07f42c91a37ec07 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Thu, 9 Nov 2017 22:47:08 +0800 Subject: [PATCH 264/335] Support padding removing. --- data_utils/data.py | 37 ++++++++++++++++++++++++++++++++++--- infer.py | 8 +++++--- model_utils/model.py | 30 ++++++++++++++++++++++++++---- model_utils/network.py | 42 +++++++++++++++++++++++++++++++++++------- test.py | 8 +++++--- train.py | 6 ++++-- 6 files changed, 109 insertions(+), 22 deletions(-) diff --git a/data_utils/data.py b/data_utils/data.py index 70ee6fbad..1469beb08 100644 --- a/data_utils/data.py +++ b/data_utils/data.py @@ -59,6 +59,9 @@ class DataGenerator(object): be passed forward directly without converting to index sequence. :type keep_transcription_text: bool + :param num_conv_layers: The number of convolution layer, used to compute + the sequence length. + :type num_conv_layers: int """ def __init__(self, @@ -74,7 +77,8 @@ class DataGenerator(object): use_dB_normalization=True, num_threads=multiprocessing.cpu_count() // 2, random_seed=0, - keep_transcription_text=False): + keep_transcription_text=False, + num_conv_layers=2): self._max_duration = max_duration self._min_duration = min_duration self._normalizer = FeatureNormalizer(mean_std_filepath) @@ -95,6 +99,7 @@ class DataGenerator(object): self._local_data = local() self._local_data.tar2info = {} self._local_data.tar2object = {} + self._num_conv_layers = num_conv_layers def process_utterance(self, filename, transcript): """Load, augment, featurize and normalize for speech data. @@ -213,7 +218,15 @@ class DataGenerator(object): :return: Data feeding dict. :rtype: dict """ - return {"audio_spectrogram": 0, "transcript_text": 1} + feeding_dict = { + "audio_spectrogram": 0, + "transcript_text": 1, + "sequence_offset": 2, + "sequence_length": 3 + } + for i in xrange(self._num_conv_layers): + feeding_dict["conv%d_index_range" % i] = len(feeding_dict) + return feeding_dict @property def vocab_size(self): @@ -306,7 +319,25 @@ class DataGenerator(object): padded_audio[:, :audio.shape[1]] = audio if flatten: padded_audio = padded_audio.flatten() - new_batch.append((padded_audio, text)) + + padded_instance = [padded_audio, text] + padded_conv0_h = (padded_audio.shape[0] - 1) // 2 + 1 + padded_conv0_w = (padded_audio.shape[1] - 1) // 3 + 1 + valid_w = (audio.shape[1] - 1) // 3 + 1 + padded_instance += [ + [0], # sequence offset, always 0 + [valid_w], # valid sequence length + [1, 32, 1, padded_conv0_h, valid_w + 1, padded_conv0_w] + ] + pre_padded_h = padded_conv0_h + for i in xrange(self._num_conv_layers - 1): + padded_h = (pre_padded_h - 1) // 2 + 1 + pre_padded_h = padded_h + padded_instance += [ + [1, 32, 1, padded_h, valid_w + 1, padded_conv0_w] + ] + + new_batch.append(padded_instance) return new_batch def _batch_shuffle(self, manifest, batch_size, clipped=False): diff --git a/infer.py b/infer.py index 9ac3e632e..32d15f126 100644 --- a/infer.py +++ b/infer.py @@ -69,7 +69,8 @@ def infer(): augmentation_config='{}', specgram_type=args.specgram_type, num_threads=1, - keep_transcription_text=True) + keep_transcription_text=True, + num_conv_layers=args.num_conv_layers) batch_reader = data_generator.batch_reader_creator( manifest_path=args.infer_manifest, batch_size=args.num_samples, @@ -100,10 +101,11 @@ def infer(): cutoff_top_n=args.cutoff_top_n, vocab_list=vocab_list, language_model_path=args.lang_model_path, - num_processes=args.num_proc_bsearch) + num_processes=args.num_proc_bsearch, + feeding_dict=data_generator.feeding) error_rate_func = cer if args.error_rate_type == 'cer' else wer - target_transcripts = [transcript for _, transcript in infer_data] + target_transcripts = [data[1] for data in infer_data] for target, result in zip(target_transcripts, result_transcripts): print("\nTarget Transcription: %s\nOutput Transcription: %s" % (target, result)) diff --git a/model_utils/model.py b/model_utils/model.py index 5a0d8890d..26aa1470a 100644 --- a/model_utils/model.py +++ b/model_utils/model.py @@ -165,7 +165,7 @@ class DeepSpeech2Model(object): def infer_batch(self, infer_data, decoding_method, beam_alpha, beam_beta, beam_size, cutoff_prob, cutoff_top_n, vocab_list, - language_model_path, num_processes): + language_model_path, num_processes, feeding_dict): """Model inference. Infer the transcription for a batch of speech utterances. @@ -195,6 +195,9 @@ class DeepSpeech2Model(object): :type language_model_path: basestring|None :param num_processes: Number of processes (CPU) for decoder. :type num_processes: int + :param feeding_dict: Feeding is a map of field name and tuple index + of the data that reader returns. + :type feeding_dict: dict|list :return: List of transcription texts. :rtype: List of basestring """ @@ -203,10 +206,13 @@ class DeepSpeech2Model(object): self._inferer = paddle.inference.Inference( output_layer=self._log_probs, parameters=self._parameters) # run inference - infer_results = self._inferer.infer(input=infer_data) - num_steps = len(infer_results) // len(infer_data) + infer_results = self._inferer.infer( + input=infer_data, feeding=feeding_dict) + start_pos = [0] * (len(infer_data) + 1) + for i in xrange(len(infer_data)): + start_pos[i + 1] = start_pos[i] + infer_data[i][3][0] probs_split = [ - infer_results[i * num_steps:(i + 1) * num_steps] + infer_results[start_pos[i]:start_pos[i + 1]] for i in xrange(0, len(infer_data)) ] # run decoder @@ -274,9 +280,25 @@ class DeepSpeech2Model(object): text_data = paddle.layer.data( name="transcript_text", type=paddle.data_type.integer_value_sequence(vocab_size)) + seq_offset_data = paddle.layer.data( + name='sequence_offset', + type=paddle.data_type.integer_value_sequence(1)) + seq_len_data = paddle.layer.data( + name='sequence_length', + type=paddle.data_type.integer_value_sequence(1)) + index_range_datas = [] + for i in xrange(num_rnn_layers): + index_range_datas.append( + paddle.layer.data( + name='conv%d_index_range' % i, + type=paddle.data_type.dense_vector(6))) + self._log_probs, self._loss = deep_speech_v2_network( audio_data=audio_data, text_data=text_data, + seq_offset_data=seq_offset_data, + seq_len_data=seq_len_data, + index_range_datas=index_range_datas, dict_size=vocab_size, num_conv_layers=num_conv_layers, num_rnn_layers=num_rnn_layers, diff --git a/model_utils/network.py b/model_utils/network.py index 13ba5d2c9..2053e906e 100644 --- a/model_utils/network.py +++ b/model_utils/network.py @@ -7,7 +7,7 @@ import paddle.v2 as paddle def conv_bn_layer(input, filter_size, num_channels_in, num_channels_out, stride, - padding, act): + padding, act, index_range_data): """Convolution layer with batch normalization. :param input: Input layer. @@ -24,6 +24,8 @@ def conv_bn_layer(input, filter_size, num_channels_in, num_channels_out, stride, :type padding: int|tuple|list :param act: Activation type. :type act: BaseActivation + :param index_range_data: Index range to indicate sub region. + :type index_range_data: LayerOutput :return: Batch norm layer after convolution layer. :rtype: LayerOutput """ @@ -36,7 +38,11 @@ def conv_bn_layer(input, filter_size, num_channels_in, num_channels_out, stride, padding=padding, act=paddle.activation.Linear(), bias_attr=False) - return paddle.layer.batch_norm(input=conv_layer, act=act) + batch_norm = paddle.layer.batch_norm(input=conv_layer, act=act) + # reset padding part to 0 + scale_sub_region = paddle.layer.scale_sub_region( + batch_norm, index_range_data, value=0.0) + return scale_sub_region def bidirectional_simple_rnn_bn_layer(name, input, size, act, share_weights): @@ -136,13 +142,15 @@ def bidirectional_gru_bn_layer(name, input, size, act): return paddle.layer.concat(input=[forward_gru, backward_gru]) -def conv_group(input, num_stacks): +def conv_group(input, num_stacks, index_range_datas): """Convolution group with stacked convolution layers. :param input: Input layer. :type input: LayerOutput :param num_stacks: Number of stacked convolution layers. :type num_stacks: int + :param index_range_datas: Index ranges for each convolution layer. + :type index_range_datas: tuple|list :return: Output layer of the convolution group. :rtype: LayerOutput """ @@ -153,7 +161,8 @@ def conv_group(input, num_stacks): num_channels_out=32, stride=(3, 2), padding=(5, 20), - act=paddle.activation.BRelu()) + act=paddle.activation.BRelu(), + index_range_data=index_range_datas[0]) for i in xrange(num_stacks - 1): conv = conv_bn_layer( input=conv, @@ -162,7 +171,8 @@ def conv_group(input, num_stacks): num_channels_out=32, stride=(1, 2), padding=(5, 10), - act=paddle.activation.BRelu()) + act=paddle.activation.BRelu(), + index_range_data=index_range_datas[i + 1]) output_num_channels = 32 output_height = 160 // pow(2, num_stacks) + 1 return conv, output_num_channels, output_height @@ -207,6 +217,9 @@ def rnn_group(input, size, num_stacks, use_gru, share_rnn_weights): def deep_speech_v2_network(audio_data, text_data, + seq_offset_data, + seq_len_data, + index_range_datas, dict_size, num_conv_layers=2, num_rnn_layers=3, @@ -219,6 +232,12 @@ def deep_speech_v2_network(audio_data, :type audio_data: LayerOutput :param text_data: Transcription text data layer. :type text_data: LayerOutput + :param seq_offset_data: Sequence offset data layer. + :type seq_offset_data: LayerOutput + :param seq_len_data: Valid sequence length data layer. + :type seq_len_data: LayerOutput + :param index_range_datas: Index ranges data layers. + :type index_range_datas: tuple|list :param dict_size: Dictionary size for tokenized transcription. :type dict_size: int :param num_conv_layers: Number of stacking convolution layers. @@ -239,7 +258,9 @@ def deep_speech_v2_network(audio_data, """ # convolution group conv_group_output, conv_group_num_channels, conv_group_height = conv_group( - input=audio_data, num_stacks=num_conv_layers) + input=audio_data, + num_stacks=num_conv_layers, + index_range_datas=index_range_datas) # convert data form convolution feature map to sequence of vectors conv2seq = paddle.layer.block_expand( input=conv_group_output, @@ -248,9 +269,16 @@ def deep_speech_v2_network(audio_data, stride_y=1, block_x=1, block_y=conv_group_height) + # remove padding part + remove_padding = paddle.layer.sub_seq( + input=conv2seq, + offsets=seq_offset_data, + sizes=seq_len_data, + act=paddle.activation.Linear(), + bias_attr=False) # rnn group rnn_group_output = rnn_group( - input=conv2seq, + input=remove_padding, size=rnn_size, num_stacks=num_rnn_layers, use_gru=use_gru, diff --git a/test.py b/test.py index 63fc4f65c..53f7e17af 100644 --- a/test.py +++ b/test.py @@ -70,7 +70,8 @@ def evaluate(): augmentation_config='{}', specgram_type=args.specgram_type, num_threads=args.num_proc_data, - keep_transcription_text=True) + keep_transcription_text=True, + num_conv_layers=args.num_conv_layers) batch_reader = data_generator.batch_reader_creator( manifest_path=args.test_manifest, batch_size=args.batch_size, @@ -103,8 +104,9 @@ def evaluate(): cutoff_top_n=args.cutoff_top_n, vocab_list=vocab_list, language_model_path=args.lang_model_path, - num_processes=args.num_proc_bsearch) - target_transcripts = [transcript for _, transcript in infer_data] + num_processes=args.num_proc_bsearch, + feeding_dict=data_generator.feeding) + target_transcripts = [data[1] for data in infer_data] for target, result in zip(target_transcripts, result_transcripts): error_sum += error_rate_func(target, result) num_ins += 1 diff --git a/train.py b/train.py index 16415713f..562fb4622 100644 --- a/train.py +++ b/train.py @@ -75,13 +75,15 @@ def train(): max_duration=args.max_duration, min_duration=args.min_duration, specgram_type=args.specgram_type, - num_threads=args.num_proc_data) + num_threads=args.num_proc_data, + num_conv_layers=args.num_conv_layers) dev_generator = DataGenerator( vocab_filepath=args.vocab_path, mean_std_filepath=args.mean_std_path, augmentation_config="{}", specgram_type=args.specgram_type, - num_threads=args.num_proc_data) + num_threads=args.num_proc_data, + num_conv_layers=args.num_conv_layers) train_batch_reader = train_generator.batch_reader_creator( manifest_path=args.train_manifest, batch_size=args.batch_size, From a02b8f8084b3e3534e1f7ffc662e120d50fdc8c7 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Thu, 9 Nov 2017 23:14:10 +0800 Subject: [PATCH 265/335] Add clean callback. --- data_utils/data.py | 8 +++++++- data_utils/utility.py | 6 +++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/data_utils/data.py b/data_utils/data.py index 70ee6fbad..1c35f654c 100644 --- a/data_utils/data.py +++ b/data_utils/data.py @@ -11,6 +11,7 @@ import multiprocessing import numpy as np import paddle.v2 as paddle from threading import local +import atexit from data_utils.utility import read_manifest from data_utils.utility import xmap_readers_mp from data_utils.augmentor.augmentation import AugmentationPipeline @@ -274,13 +275,18 @@ class DataGenerator(object): for instance in manifest: yield instance - return xmap_readers_mp( + reader, cleanup_callback = xmap_readers_mp( lambda instance: self.process_utterance(instance["audio_filepath"], instance["text"]), reader, self._num_threads, 4096, order=True) + # register callback to main process + atexit.register(cleanup_callback) + + return reader + def _padding_batch(self, batch, padding_to=-1, flatten=False): """ Padding audio features with zeros to make them have the same shape (or diff --git a/data_utils/utility.py b/data_utils/utility.py index 49eed6d8d..bb5cad45b 100644 --- a/data_utils/utility.py +++ b/data_utils/utility.py @@ -138,6 +138,10 @@ def xmap_readers_mp(mapper, reader, process_num, buffer_size, order=False): out_queue.put(sample) out_queue.put(end_flag) + def cleanup(): + # kill all sub process and threads + os._exit(0) + def xreader(): # prepare shared memory manager = Manager() @@ -174,4 +178,4 @@ def xmap_readers_mp(mapper, reader, process_num, buffer_size, order=False): yield sample sample = flush_queue.get() - return xreader + return xreader, cleanup From f38d948193a1fb6ef967e2036e5c7cbceabaec16 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Fri, 10 Nov 2017 14:43:05 +0800 Subject: [PATCH 266/335] Add more comments. --- data_utils/data.py | 5 +++++ model_utils/network.py | 4 ++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/data_utils/data.py b/data_utils/data.py index 1469beb08..d913e48a0 100644 --- a/data_utils/data.py +++ b/data_utils/data.py @@ -320,6 +320,9 @@ class DataGenerator(object): if flatten: padded_audio = padded_audio.flatten() + # Stride size for conv0 is (3, 2) + # Stride size for conv1 to convN is (1, 2) + # Same as the network, hard-coded here padded_instance = [padded_audio, text] padded_conv0_h = (padded_audio.shape[0] - 1) // 2 + 1 padded_conv0_w = (padded_audio.shape[1] - 1) // 3 + 1 @@ -327,6 +330,8 @@ class DataGenerator(object): padded_instance += [ [0], # sequence offset, always 0 [valid_w], # valid sequence length + # Index ranges for channel, height and width + # Please refer scale_sub_region layer to see details [1, 32, 1, padded_conv0_h, valid_w + 1, padded_conv0_w] ] pre_padded_h = padded_conv0_h diff --git a/model_utils/network.py b/model_utils/network.py index 2053e906e..7b4b8ab20 100644 --- a/model_utils/network.py +++ b/model_utils/network.py @@ -270,7 +270,7 @@ def deep_speech_v2_network(audio_data, block_x=1, block_y=conv_group_height) # remove padding part - remove_padding = paddle.layer.sub_seq( + remove_padding_data = paddle.layer.sub_seq( input=conv2seq, offsets=seq_offset_data, sizes=seq_len_data, @@ -278,7 +278,7 @@ def deep_speech_v2_network(audio_data, bias_attr=False) # rnn group rnn_group_output = rnn_group( - input=remove_padding, + input=remove_padding_data, size=rnn_size, num_stacks=num_rnn_layers, use_gru=use_gru, From bf69a1013b0dc16422635dd26ebaa492028dcf04 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Fri, 10 Nov 2017 16:31:59 +0800 Subject: [PATCH 267/335] Adapt tuning script to padding removing #444 --- tools/tune.py | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/tools/tune.py b/tools/tune.py index 966029a82..47abf1413 100644 --- a/tools/tune.py +++ b/tools/tune.py @@ -88,7 +88,8 @@ def tune(): augmentation_config='{}', specgram_type=args.specgram_type, num_threads=args.num_proc_data, - keep_transcription_text=True) + keep_transcription_text=True, + num_conv_layers=args.num_conv_layers) audio_data = paddle.layer.data( name="audio_spectrogram", @@ -96,10 +97,25 @@ def tune(): text_data = paddle.layer.data( name="transcript_text", type=paddle.data_type.integer_value_sequence(data_generator.vocab_size)) + seq_offset_data = paddle.layer.data( + name='sequence_offset', + type=paddle.data_type.integer_value_sequence(1)) + seq_len_data = paddle.layer.data( + name='sequence_length', + type=paddle.data_type.integer_value_sequence(1)) + index_range_datas = [] + for i in xrange(args.num_rnn_layers): + index_range_datas.append( + paddle.layer.data( + name='conv%d_index_range' % i, + type=paddle.data_type.dense_vector(6))) output_probs, _ = deep_speech_v2_network( audio_data=audio_data, text_data=text_data, + seq_offset_data=seq_offset_data, + seq_len_data=seq_len_data, + index_range_datas=index_range_datas, dict_size=data_generator.vocab_size, num_conv_layers=args.num_conv_layers, num_rnn_layers=args.num_rnn_layers, @@ -156,15 +172,17 @@ def tune(): for infer_data in batch_reader(): if (args.num_batches >= 0) and (cur_batch >= args.num_batches): break - infer_results = inferer.infer(input=infer_data) - - num_steps = len(infer_results) // len(infer_data) + infer_results = inferer.infer(input=infer_data, + feeding=data_generator.feeding) + start_pos = [0] * (len(infer_data) + 1) + for i in xrange(len(infer_data)): + start_pos[i + 1] = start_pos[i] + infer_data[i][3][0] probs_split = [ - infer_results[i * num_steps:(i + 1) * num_steps] - for i in xrange(len(infer_data)) + infer_results[start_pos[i]:start_pos[i + 1]] + for i in xrange(0, len(infer_data)) ] - target_transcripts = [transcript for _, transcript in infer_data] + target_transcripts = [ data[1] for data in infer_data ] num_ins += len(target_transcripts) # grid search From 22a882e30b30a5f219c5bf7b4dda558b154a428e Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Fri, 10 Nov 2017 16:48:32 +0800 Subject: [PATCH 268/335] Set the version of CTC decoders to 1.0 --- decoders/swig/setup.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/decoders/swig/setup.py b/decoders/swig/setup.py index 8af9ff304..b6bc0ca06 100644 --- a/decoders/swig/setup.py +++ b/decoders/swig/setup.py @@ -70,7 +70,6 @@ FILES = glob.glob('kenlm/util/*.cc') \ FILES += glob.glob('openfst-1.6.3/src/lib/*.cc') -# FILES + glob.glob('glog/src/*.cc') FILES = [ fn for fn in FILES if not (fn.endswith('main.cc') or fn.endswith('test.cc') or fn.endswith( @@ -107,7 +106,6 @@ decoders_module = [ 'kenlm', 'openfst-1.6.3/src/include', 'ThreadPool', - #'glog/src' ], libraries=LIBS, extra_compile_args=ARGS) @@ -115,7 +113,7 @@ decoders_module = [ setup( name='swig_decoders', - version='0.1', + version='1.0', description="""CTC decoders""", ext_modules=decoders_module, py_modules=['swig_decoders'], ) From edaed68f33b7fff2455c26b880340ba680621f5c Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Sun, 12 Nov 2017 16:47:28 +0800 Subject: [PATCH 269/335] Involve dev dataset to build vocabulary in the aishell example --- examples/aishell/run_data.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/aishell/run_data.sh b/examples/aishell/run_data.sh index 8bacf6d80..eb0388d84 100644 --- a/examples/aishell/run_data.sh +++ b/examples/aishell/run_data.sh @@ -17,7 +17,7 @@ fi python tools/build_vocab.py \ --count_threshold=0 \ --vocab_path='data/aishell/vocab.txt' \ ---manifest_paths='data/aishell/manifest.train' +--manifest_paths 'data/aishell/manifest.train' 'data/aishell/manifest.dev' if [ $? -ne 0 ]; then echo "Build vocabulary failed. Terminated." From ecbed5565d8bde20845e96489ef6a511f065a447 Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Tue, 14 Nov 2017 06:55:35 -0600 Subject: [PATCH 270/335] Create License Add Apache License. --- LICENSE | 201 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 201 insertions(+) create mode 100644 LICENSE diff --git a/LICENSE b/LICENSE new file mode 100644 index 000000000..261eeb9e9 --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. From 12a1ce2570c3c63b61f4454c183ce8e9c8c7d333 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Tue, 14 Nov 2017 21:34:47 +0800 Subject: [PATCH 271/335] Add ci and code format checking. --- .clang-format | 29 +++++++++++++++++++++++++++ .clang_format.hook | 15 ++++++++++++++ .gitignore | 2 ++ .pre-commit-config.yaml | 43 +++++++++++++++++++++++++++++++++++++++++ .style.yapf | 3 +++ .travis.yml | 37 +++++++++++++++++++++++++++++++++++ .travis/precommit.sh | 21 ++++++++++++++++++++ .travis/unittest.sh | 36 ++++++++++++++++++++++++++++++++++ 8 files changed, 186 insertions(+) create mode 100644 .clang-format create mode 100755 .clang_format.hook create mode 100644 .gitignore create mode 100644 .pre-commit-config.yaml create mode 100644 .style.yapf create mode 100644 .travis.yml create mode 100755 .travis/precommit.sh create mode 100755 .travis/unittest.sh diff --git a/.clang-format b/.clang-format new file mode 100644 index 000000000..30863c27a --- /dev/null +++ b/.clang-format @@ -0,0 +1,29 @@ +# This file is used by clang-format to autoformat paddle source code +# +# The clang-format is part of llvm toolchain. +# It need to install llvm and clang to format source code style. +# +# The basic usage is, +# clang-format -i -style=file PATH/TO/SOURCE/CODE +# +# The -style=file implicit use ".clang-format" file located in one of +# parent directory. +# The -i means inplace change. +# +# The document of clang-format is +# http://clang.llvm.org/docs/ClangFormat.html +# http://clang.llvm.org/docs/ClangFormatStyleOptions.html +--- +Language: Cpp +BasedOnStyle: Google +IndentWidth: 2 +TabWidth: 2 +ContinuationIndentWidth: 4 +MaxEmptyLinesToKeep: 2 +AccessModifierOffset: -2 # The private/protected/public has no indent in class +Standard: Cpp11 +AllowAllParametersOfDeclarationOnNextLine: true +BinPackParameters: false +BinPackArguments: false +... + diff --git a/.clang_format.hook b/.clang_format.hook new file mode 100755 index 000000000..40d70f56c --- /dev/null +++ b/.clang_format.hook @@ -0,0 +1,15 @@ +#!/usr/bin/env bash +set -e + +readonly VERSION="3.8" + +version=$(clang-format -version) + +if ! [[ $version == *"$VERSION"* ]]; then + echo "clang-format version check failed." + echo "a version contains '$VERSION' is needed, but get '$version'" + echo "you can install the right version, and make an soft-link to '\$PATH' env" + exit -1 +fi + +clang-format $@ diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..dde3895fc --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.DS_Store +*.pyc diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 000000000..8ff36e098 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,43 @@ +- repo: https://github.com/pre-commit/mirrors-yapf.git + sha: v0.16.0 + hooks: + - id: yapf + files: \.py$ +- repo: https://github.com/pre-commit/pre-commit-hooks + sha: a11d9314b22d8f8c7556443875b731ef05965464 + hooks: + - id: check-merge-conflict + - id: check-symlinks + - id: detect-private-key + files: (?!.*paddle)^.*$ + - id: end-of-file-fixer + files: \.md$ + - id: trailing-whitespace + files: \.md$ +- repo: https://github.com/Lucas-C/pre-commit-hooks + sha: v1.0.1 + hooks: + - id: forbid-crlf + files: \.md$ + - id: remove-crlf + files: \.md$ + - id: forbid-tabs + files: \.md$ + - id: remove-tabs + files: \.md$ +- repo: local + hooks: + - id: clang-format + name: clang-format + description: Format files with ClangFormat + entry: bash .clang_format.hook -i + language: system + files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|cuh|proto)$ +- repo: local + hooks: + - id: convert-markdown-into-html + name: convert-markdown-into-html + description: Convert README.md into index.html + entry: python .pre-commit-hooks/convert_markdown_into_html.py + language: system + files: .+README\.md$ diff --git a/.style.yapf b/.style.yapf new file mode 100644 index 000000000..4741fb4f3 --- /dev/null +++ b/.style.yapf @@ -0,0 +1,3 @@ +[style] +based_on_style = pep8 +column_limit = 80 diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 000000000..0f67f656f --- /dev/null +++ b/.travis.yml @@ -0,0 +1,37 @@ +language: cpp +cache: ccache +sudo: required +dist: trusty +services: + - docker +os: + - linux +env: + - JOB=PRE_COMMIT + +addons: + apt: + packages: + - git + - python + - python-pip + - python2.7-dev + ssh_known_hosts: 52.76.173.135 +before_install: + - sudo pip install -U virtualenv pre-commit pip + - docker pull paddlepaddle/paddle:latest +script: + - .travis/precommit.sh + - docker run -i --rm -v "$PWD:/py_unittest" paddlepaddle/paddle:latest /bin/bash -c + 'cd /py_unittest; sh .travis/unittest.sh' + - | + if [[ "$TRAVIS_PULL_REQUEST" != "false" ]]; then exit 0; fi; + if [[ "$TRAVIS_BRANCH" != "develop" && ! "$TRAVIS_BRANCH" =~ ^v[[:digit:]]+\.[[:digit:]]+(\.[[:digit:]]+)?(-\S*)?$ ]]; then echo "not develop branch, no deploy"; exit 0; fi; + export DEPLOY_DOCS_SH=https://raw.githubusercontent.com/PaddlePaddle/PaddlePaddle.org/master/scripts/deploy/deploy_docs.sh + export MODELS_DIR=`pwd` + cd .. + curl $DEPLOY_DOCS_SH | bash -s $CONTENT_DEC_PASSWD $TRAVIS_BRANCH $MODELS_DIR +notifications: + email: + on_success: change + on_failure: always diff --git a/.travis/precommit.sh b/.travis/precommit.sh new file mode 100755 index 000000000..bcbfb2bb5 --- /dev/null +++ b/.travis/precommit.sh @@ -0,0 +1,21 @@ +#!/bin/bash +function abort(){ + echo "Your commit not fit PaddlePaddle code style" 1>&2 + echo "Please use pre-commit scripts to auto-format your code" 1>&2 + exit 1 +} + +trap 'abort' 0 +set -e +cd `dirname $0` +cd .. +export PATH=/usr/bin:$PATH +pre-commit install + +if ! pre-commit run -a ; then + ls -lh + git diff --exit-code + exit 1 +fi + +trap : 0 diff --git a/.travis/unittest.sh b/.travis/unittest.sh new file mode 100755 index 000000000..4195a441e --- /dev/null +++ b/.travis/unittest.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +abort(){ + echo "Run unittest failed" 1>&2 + echo "Please check your code" 1>&2 + exit 1 +} + +unittest(){ + cd $1 > /dev/null + if [ -f "setup.sh" ]; then + sh setup.sh + export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH + fi + if [ $? != 0 ]; then + exit 1 + fi + find . -name 'tests' -type d -print0 | \ + xargs -0 -I{} -n1 bash -c \ + 'python -m unittest discover -v -s {}' + cd - > /dev/null +} + +trap 'abort' 0 +set -e + +for proj in */ ; do + if [ -d $proj ]; then + unittest $proj + if [ $? != 0 ]; then + exit 1 + fi + fi +done + +trap : 0 From 1e3875160cf50998d0ff6d6a5f63304b4994a42a Mon Sep 17 00:00:00 2001 From: yangyaming Date: Wed, 15 Nov 2017 11:39:37 +0800 Subject: [PATCH 272/335] Add url for BaiduEng8k model. --- README.md | 2 +- models/baidu_en8k/download_model.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 543af0ad1..6c473b699 100644 --- a/README.md +++ b/README.md @@ -480,7 +480,7 @@ python deploy/demo_client.py --help Language | Model Name | Training Data | Hours of Speech :-----------: | :------------: | :----------: | -------: English | [LibriSpeech Model](http://cloud.dlnel.org/filepub/?uuid=17404caf-cf19-492f-9707-1fad07c19aae) | [LibriSpeech Dataset](http://www.openslr.org/12/) | 960 h -English | [BaiduEN8k Model](to-be-added) | Baidu Internal English Dataset | 8628 h +English | [BaiduEN8k Model](http://cloud.dlnel.org/filepub/?uuid=37a1c211-ec47-494c-973c-31437a10ae90) | Baidu Internal English Dataset | 8628 h Mandarin | [Aishell Model](http://cloud.dlnel.org/filepub/?uuid=6c83b9d8-3255-4adf-9726-0fe0be3d0274) | [Aishell Dataset](http://www.openslr.org/33/) | 151 h Mandarin | [BaiduCN1.2k Model](to-be-added) | Baidu Internal Mandarin Dataset | 1204 h diff --git a/models/baidu_en8k/download_model.sh b/models/baidu_en8k/download_model.sh index 7d92fd52e..e6b26a3e8 100644 --- a/models/baidu_en8k/download_model.sh +++ b/models/baidu_en8k/download_model.sh @@ -2,7 +2,7 @@ . ../../utils/utility.sh -URL='To-be-added' +URL='http://cloud.dlnel.org/filepub/?uuid=37a1c211-ec47-494c-973c-31437a10ae90' MD5=a19d40cb3b558eb696c44d883f32cfda TARGET=./baidu_en8k_model.tar.gz From cc5e4203317c7870d0743c8bc1d9497b05866311 Mon Sep 17 00:00:00 2001 From: Hu Weiwei Date: Wed, 15 Nov 2017 02:43:54 -0600 Subject: [PATCH 273/335] fix typo --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 6c473b699..ca1469266 100644 --- a/README.md +++ b/README.md @@ -187,7 +187,7 @@ Six optional augmentation components are provided to be selected, configured and - Noise Perturbation (need background noise audio files) - Impulse Response (need impulse audio files) -In order to inform the trainer of what augmentation components are needed and what their processing orders are, it is required to prepare in advance a *augmentation configuration file* in [JSON](http://www.json.org/) format. For example: +In order to inform the trainer of what augmentation components are needed and what their processing orders are, it is required to prepare in advance an *augmentation configuration file* in [JSON](http://www.json.org/) format. For example: ``` [{ @@ -226,7 +226,7 @@ If you wish to train your own better language model, please refer to [KenLM](htt #### English LM -The English corpus is from the [Common Crawl Repository](http://commoncrawl.org) and you can download it from [statmt](http://data.statmt.org/ngrams/deduped_en). We use part en.00 to train our English languge model. There are some preprocessing steps before training: +The English corpus is from the [Common Crawl Repository](http://commoncrawl.org) and you can download it from [statmt](http://data.statmt.org/ngrams/deduped_en). We use part en.00 to train our English language model. There are some preprocessing steps before training: * Characters not in \[A-Za-z0-9\s'\] (\s represents whitespace characters) are removed and Arabic numbers are converted to English numbers like 1000 to one thousand. * Repeated whitespace characters are squeezed to one and the beginning whitespace characters are removed. Notice that all transcriptions are lowercase, so all characters are converted to lowercase. From 528895cf2618745667a108f15aa7c6f8a0b6e84d Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Wed, 15 Nov 2017 18:11:10 +0800 Subject: [PATCH 274/335] fix the model path in the tiny example --- examples/tiny/run_test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/tiny/run_test.sh b/examples/tiny/run_test.sh index a58f5d100..d33863222 100644 --- a/examples/tiny/run_test.sh +++ b/examples/tiny/run_test.sh @@ -32,7 +32,7 @@ python -u test.py \ --test_manifest='data/tiny/manifest.tiny' \ --mean_std_path='data/tiny/mean_std.npz' \ --vocab_path='data/tiny/vocab.txt' \ ---model_path='checkpoints/params.pass-19.tar.gz' \ +--model_path='checkpoints/tiny/params.pass-19.tar.gz' \ --lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \ --decoding_method='ctc_beam_search' \ --error_rate_type='wer' \ From 0923f3a520de0012181b37edc50ced5a6dda1634 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Wed, 15 Nov 2017 18:26:18 +0800 Subject: [PATCH 275/335] fix doc for Docker --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index ca1469266..2552bad21 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ ## Installation -To avoid the trouble of environment setup, [running in docker container](#running-in-docker-container) is highly recommended. Otherwise follow the guidelines below to install the dependencies manually. +To avoid the trouble of environment setup, [running in Docker container](#running-in-docker-container) is highly recommended. Otherwise follow the guidelines below to install the dependencies manually. ### Prerequisites - Python 2.7 only supported @@ -344,7 +344,7 @@ Take several steps to launch the Docker image: - Download the Docker image ```bash -nvidia-docker pull paddlepaddle/models:deep-speech-2 +sudo nvidia-docker pull paddlepaddle/models:deep-speech-2 ``` - Clone this repository From 9800d1495690d2d85305be750317e8cd623222b4 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Thu, 16 Nov 2017 11:49:43 +0800 Subject: [PATCH 276/335] fix the data path in the librispeech example --- examples/librispeech/run_data.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/librispeech/run_data.sh b/examples/librispeech/run_data.sh index 12f2dc6d5..6e170c12a 100644 --- a/examples/librispeech/run_data.sh +++ b/examples/librispeech/run_data.sh @@ -5,7 +5,7 @@ cd ../.. > /dev/null # download data, generate manifests PYTHONPATH=.:$PYTHONPATH python data/librispeech/librispeech.py \ --manifest_prefix='data/librispeech/manifest' \ ---target_dir='~/.cache/paddle/dataset/speech/Libri' \ +--target_dir='~/.cache/paddle/dataset/speech/libri' \ --full_download='True' if [ $? -ne 0 ]; then From 24cb1866a8227d76d8d342d7886f6652ad1ed697 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Thu, 16 Nov 2017 16:29:27 +0800 Subject: [PATCH 277/335] Adapt settings for Aishell example. --- examples/aishell/run_infer.sh | 6 +++--- examples/aishell/run_infer_golden.sh | 6 +++--- examples/aishell/run_test.sh | 6 +++--- examples/aishell/run_test_golden.sh | 6 +++--- examples/aishell/run_train.sh | 2 +- 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/examples/aishell/run_infer.sh b/examples/aishell/run_infer.sh index 404555e8b..e8bd9eab1 100644 --- a/examples/aishell/run_infer.sh +++ b/examples/aishell/run_infer.sh @@ -21,11 +21,11 @@ python -u infer.py \ --num_conv_layers=2 \ --num_rnn_layers=3 \ --rnn_layer_size=1024 \ ---alpha=1.4 \ ---beta=2.4 \ +--alpha=2.6 \ +--beta=5.0 \ --cutoff_prob=0.99 \ --cutoff_top_n=40 \ ---use_gru=False \ +--use_gru=True \ --use_gpu=True \ --share_rnn_weights=False \ --infer_manifest='data/aishell/manifest.test' \ diff --git a/examples/aishell/run_infer_golden.sh b/examples/aishell/run_infer_golden.sh index 4701bdaac..68f5a521a 100644 --- a/examples/aishell/run_infer_golden.sh +++ b/examples/aishell/run_infer_golden.sh @@ -30,11 +30,11 @@ python -u infer.py \ --num_conv_layers=2 \ --num_rnn_layers=3 \ --rnn_layer_size=1024 \ ---alpha=1.4 \ ---beta=2.4 \ +--alpha=2.6 \ +--beta=5.0 \ --cutoff_prob=0.99 \ --cutoff_top_n=40 \ ---use_gru=False \ +--use_gru=True \ --use_gpu=True \ --share_rnn_weights=False \ --infer_manifest='data/aishell/manifest.test' \ diff --git a/examples/aishell/run_test.sh b/examples/aishell/run_test.sh index feec95cb4..35dfca82f 100644 --- a/examples/aishell/run_test.sh +++ b/examples/aishell/run_test.sh @@ -22,11 +22,11 @@ python -u test.py \ --num_conv_layers=2 \ --num_rnn_layers=3 \ --rnn_layer_size=1024 \ ---alpha=1.4 \ ---beta=2.4 \ +--alpha=2.6 \ +--beta=5.0 \ --cutoff_prob=0.99 \ --cutoff_top_n=40 \ ---use_gru=False \ +--use_gru=True \ --use_gpu=True \ --share_rnn_weights=False \ --test_manifest='data/aishell/manifest.test' \ diff --git a/examples/aishell/run_test_golden.sh b/examples/aishell/run_test_golden.sh index 387d54f39..8b5e65595 100644 --- a/examples/aishell/run_test_golden.sh +++ b/examples/aishell/run_test_golden.sh @@ -31,11 +31,11 @@ python -u test.py \ --num_conv_layers=2 \ --num_rnn_layers=3 \ --rnn_layer_size=1024 \ ---alpha=1.4 \ ---beta=2.4 \ +--alpha=2.6 \ +--beta=5.0 \ --cutoff_prob=0.99 \ --cutoff_top_n=40 \ ---use_gru=False \ +--use_gru=True \ --use_gpu=True \ --share_rnn_weights=False \ --test_manifest='data/aishell/manifest.test' \ diff --git a/examples/aishell/run_train.sh b/examples/aishell/run_train.sh index 077fabcd8..e09205cb4 100644 --- a/examples/aishell/run_train.sh +++ b/examples/aishell/run_train.sh @@ -19,7 +19,7 @@ python -u train.py \ --min_duration=0.0 \ --test_off=False \ --use_sortagrad=True \ ---use_gru=False \ +--use_gru=True \ --use_gpu=True \ --is_local=True \ --share_rnn_weights=False \ From 35ef4624b0a5e6b657b62980a44a502b868e6cbf Mon Sep 17 00:00:00 2001 From: yangyaming Date: Thu, 16 Nov 2017 20:46:02 +0800 Subject: [PATCH 278/335] Update url for Aishell model. --- README.md | 2 +- models/aishell/download_model.sh | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index ca1469266..2e74b575c 100644 --- a/README.md +++ b/README.md @@ -481,7 +481,7 @@ Language | Model Name | Training Data | Hours of Speech :-----------: | :------------: | :----------: | -------: English | [LibriSpeech Model](http://cloud.dlnel.org/filepub/?uuid=17404caf-cf19-492f-9707-1fad07c19aae) | [LibriSpeech Dataset](http://www.openslr.org/12/) | 960 h English | [BaiduEN8k Model](http://cloud.dlnel.org/filepub/?uuid=37a1c211-ec47-494c-973c-31437a10ae90) | Baidu Internal English Dataset | 8628 h -Mandarin | [Aishell Model](http://cloud.dlnel.org/filepub/?uuid=6c83b9d8-3255-4adf-9726-0fe0be3d0274) | [Aishell Dataset](http://www.openslr.org/33/) | 151 h +Mandarin | [Aishell Model](http://cloud.dlnel.org/filepub/?uuid=61de63b9-6904-4809-ad95-0cc5104ab973) | [Aishell Dataset](http://www.openslr.org/33/) | 151 h Mandarin | [BaiduCN1.2k Model](to-be-added) | Baidu Internal Mandarin Dataset | 1204 h #### Language Model Released diff --git a/models/aishell/download_model.sh b/models/aishell/download_model.sh index 19aec554e..072fc6fa4 100644 --- a/models/aishell/download_model.sh +++ b/models/aishell/download_model.sh @@ -2,8 +2,8 @@ . ../../utils/utility.sh -URL='http://cloud.dlnel.org/filepub/?uuid=6c83b9d8-3255-4adf-9726-0fe0be3d0274' -MD5=28521a58552885a81cf92a1e9b133a71 +URL='http://cloud.dlnel.org/filepub/?uuid=61de63b9-6904-4809-ad95-0cc5104ab973' +MD5=0ee83aa15fba421e5de8fc66c8feb350 TARGET=./aishell_model.tar.gz From abbfa43b22d19b990df9a239fee5a4fbdd06b996 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Thu, 16 Nov 2017 23:04:35 +0800 Subject: [PATCH 279/335] Add script for VoxForge data preparation. --- README.md | 2 + data/voxforge/run_data.sh | 18 ++++ data/voxforge/voxforge.py | 221 ++++++++++++++++++++++++++++++++++++++ data_utils/utility.py | 19 ++++ 4 files changed, 260 insertions(+) create mode 100644 data/voxforge/run_data.sh create mode 100644 data/voxforge/voxforge.py diff --git a/README.md b/README.md index ca1469266..6f282a289 100644 --- a/README.md +++ b/README.md @@ -506,6 +506,8 @@ VoxForge European | 31.21 | 20.47 VoxForge Indian | 56.79 | 28.15 Baidu Internal Testset  |   47.73 |   8.92 +For reproducing results on VoxForge data, we provide a script to generate VoxForge dialect manifest files. Please go to ```data/voxforge``` and execute ```sh run_data.sh``` to get VoxForge dialect manifest files. Notice that VoxForge data may keep updated and the generated manifest files may have difference from those we evaluated. + #### Benchmark Results for Mandarin Model (Character Error Rate) Test Set | Aishell Model | BaiduCN1.2k Model diff --git a/data/voxforge/run_data.sh b/data/voxforge/run_data.sh new file mode 100644 index 000000000..e0a9f1b3f --- /dev/null +++ b/data/voxforge/run_data.sh @@ -0,0 +1,18 @@ +#! /usr/bin/env bash + +cd ../.. > /dev/null + +# download data, generate manifests +PYTHONPATH=.:$PYTHONPATH python data/voxforge/voxforge.py \ +--manifest_prefix='data/voxforge/manifest' \ +--target_dir='~/.cache/paddle/dataset/speech/VoxForge' \ +--is_merge_dialect=True \ +--dialects 'american' 'british' 'australian' 'european' 'irish' 'canadian' 'indian' + +if [ $? -ne 0 ]; then + echo "Prepare VoxForge failed. Terminated." + exit 1 +fi + +echo "VoxForge Data preparation done." +exit 0 diff --git a/data/voxforge/voxforge.py b/data/voxforge/voxforge.py new file mode 100644 index 000000000..63f052bd7 --- /dev/null +++ b/data/voxforge/voxforge.py @@ -0,0 +1,221 @@ +"""Prepare VoxForge dataset + +Download, unpack and create manifest files. +Manifest file is a json-format file with each line containing the +meta data (i.e. audio filepath, transcript and audio duration) +of each audio file in the data set. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import codecs +import soundfile +import json +import argparse +import shutil +import subprocess +from data_utils.utility import download_multi, unpack, getfile_insensitive + +DATA_HOME = '~/.cache/paddle/dataset/speech' + +DATA_URL = 'http://www.repository.voxforge1.org/downloads/SpeechCorpus/Trunk/' \ + 'Audio/Main/16kHz_16bit' + +parser = argparse.ArgumentParser(description=__doc__) +parser.add_argument( + "--target_dir", + default=DATA_HOME + "/VoxForge", + type=str, + help="Directory to save the dataset. (default: %(default)s)") +parser.add_argument( + "--dialects", + default=[ + 'american', 'british', 'australian', 'european', 'irish', 'canadian', + 'indian' + ], + nargs='+', + type=str, + help="Dialect types. (default: %(default)s)") +parser.add_argument( + "--is_merge_dialect", + default=True, + type=bool, + help="If set True, manifests of american dialect and canadian dialect will " + "be merged to american-canadian dialect; manifests of british " + "dialect, irish dialect and australian dialect will be merged to " + "commonwealth dialect. (default: %(default)s)") +parser.add_argument( + "--manifest_prefix", + default="manifest", + type=str, + help="Filepath prefix for output manifests. (default: %(default)s)") +args = parser.parse_args() + + +def download_and_unpack(target_dir, url): + wget_args = '-q -l 1 -N -nd -c -e robots=off -A tgz -r -np' + tgz_dir = os.path.join(target_dir, 'tgz') + exit_code = download_multi(url, tgz_dir, wget_args) + if exit_code != 0: + print('Download tgz audio files failed with exit code %d.' % exit_code) + else: + print('Download done, start unpacking ...') + audio_dir = os.path.join(target_dir, 'audio') + for root, dirs, files in os.walk(tgz_dir): + for file in files: + print(file) + if file.endswith('.tgz'): + unpack(os.path.join(root, file), audio_dir) + + +def select_dialects(target_dir, dialect_list): + """Classify audio files by dialect.""" + dialect_root_dir = os.path.join(target_dir, 'dialect') + if os.path.exists(dialect_root_dir): + shutil.rmtree(dialect_root_dir) + os.mkdir(dialect_root_dir) + audio_dir = os.path.abspath(os.path.join(target_dir, 'audio')) + for dialect in dialect_list: + # filter files by dialect + command = 'find %s -iwholename "*etc/readme*" -exec egrep -iHl \ + "pronunciation dialect.*%s" {} \;' % (audio_dir, dialect) + p = subprocess.Popen( + command, stdin=subprocess.PIPE, stdout=subprocess.PIPE, shell=True) + output, err = p.communicate() + dialect_dir = os.path.join(dialect_root_dir, dialect) + if os.path.exists(dialect_dir): + shutil.rmtree(dialect_dir) + os.mkdir(dialect_dir) + for path in output.splitlines(): + src_dir = os.path.dirname(os.path.dirname(path)) + link = os.path.basename(os.path.normpath(src_dir)) + os.symlink(src_dir, os.path.join(dialect_dir, link)) + + +def generate_manifest(data_dir, manifest_path): + json_lines = [] + + for path in os.listdir(data_dir): + audio_link = os.path.join(data_dir, path) + assert os.path.islink( + audio_link), '%s should be symbolic link.' % audio_link + actual_audio_dir = os.path.abspath(os.readlink(audio_link)) + + audio_type = '' + if os.path.isdir(os.path.join(actual_audio_dir, 'wav')): + audio_type = 'wav' + elif os.path.isdir(os.path.join(actual_audio_dir, 'flac')): + audio_type = 'flac' + else: + print('Unknown audio type, skipped processing %s.' % + actual_audio_dir) + continue + + etc_dir = os.path.join(actual_audio_dir, 'etc') + prompts_file = os.path.join(etc_dir, 'PROMPTS') + if not os.path.isfile(prompts_file): + print('PROMPTS file missing, skip processing %s.' % + actual_audio_dir) + continue + + readme_file = getfile_insensitive(os.path.join(etc_dir, 'README')) + if readme_file is None: + print('README file missing, skip processing %s.' % actual_audio_dir) + continue + + for line in file(prompts_file): + u, trans = line.strip().split(None, 1) + u_parts = u.split('/') + + # try to format the date time + try: + speaker, date, sfx = u_parts[-3].split('-') + obj = datetime.datetime.strptime(date, '%y.%m.%d') + formatted = obj.strftime('%Y%m%d') + u_parts[-3] = '-'.join([speaker, formatted, sfx]) + except Exception as e: + pass + + if len(u_parts) < 2: + u_parts = [audio_type] + u_parts + u_parts[-2] = audio_type + u_parts[-1] += '.' + audio_type + u = os.path.join(actual_audio_dir, '/'.join(u_parts[-2:])) + + if not os.path.isfile(u): + print('Audio file missing, skip processing %s.' % u) + continue + + if os.stat(u).st_size == 0: + print('Empty audio file, skip processing %s.' % u) + continue + + trans = trans.strip().replace('-', ' ') + if not trans.isupper() or \ + not trans.strip().replace(' ', '').replace("'", "").isalpha(): + print("Transcript not normalized properly, skip processing %s." + % u) + continue + + audio_data, samplerate = soundfile.read(u) + duration = float(len(audio_data)) / samplerate + json_lines.append( + json.dumps({ + 'audio_filepath': u, + 'duration': duration, + 'text': trans.lower() + })) + + with codecs.open(manifest_path, 'w', 'utf-8') as fout: + for line in json_lines: + fout.write(line + '\n') + + +def merge_manifests(manifest_files, save_path): + lines = [] + for manifest_file in manifest_files: + line = codecs.open(manifest_file, 'r', 'utf-8').readlines() + lines += line + + with codecs.open(save_path, 'w', 'utf-8') as fout: + for line in lines: + fout.write(line) + + +def prepare_dataset(url, dialects, target_dir, manifest_prefix, is_merge): + download_and_unpack(target_dir, url) + select_dialects(target_dir, dialects) + american_canadian_manifests = [] + commonwealth_manifests = [] + for dialect in dialects: + dialect_dir = os.path.join(target_dir, 'dialect', dialect) + manifest_fpath = manifest_prefix + '.' + dialect + if dialect == 'american' or dialect == 'canadian': + american_canadian_manifests.append(manifest_fpath) + if dialect == 'australian' \ + or dialect == 'british' \ + or dialect == 'irish': + commonwealth_manifests.append(manifest_fpath) + generate_manifest(dialect_dir, manifest_fpath) + + if is_merge: + if len(american_canadian_manifests) > 0: + manifest_fpath = manifest_prefix + '.american-canadian' + merge_manifests(american_canadian_manifests, manifest_fpath) + if len(commonwealth_manifests) > 0: + manifest_fpath = manifest_prefix + '.commonwealth' + merge_manifests(commonwealth_manifests, manifest_fpath) + + +def main(): + if args.target_dir.startswith('~'): + args.target_dir = os.path.expanduser(args.target_dir) + + prepare_dataset(DATA_URL, args.dialects, args.target_dir, + args.manifest_prefix, args.is_merge_dialect) + + +if __name__ == '__main__': + main() diff --git a/data_utils/utility.py b/data_utils/utility.py index bb5cad45b..2633e1b42 100644 --- a/data_utils/utility.py +++ b/data_utils/utility.py @@ -42,6 +42,25 @@ def read_manifest(manifest_path, max_duration=float('inf'), min_duration=0.0): return manifest +def getfile_insensitive(path): + """Get the actual file path when given insensitive filename.""" + directory, filename = os.path.split(path) + directory, filename = (directory or '.'), filename.lower() + for f in os.listdir(directory): + newpath = os.path.join(directory, f) + if os.path.isfile(newpath) and f.lower() == filename: + return newpath + + +def download_multi(url, target_dir, extra_args): + """Download multiple files from url to target_dir.""" + if not os.path.exists(target_dir): os.makedirs(target_dir) + print("Downloading %s ..." % url) + ret_code = os.system("wget -c " + url + ' ' + extra_args + " -P " + + target_dir) + return ret_code + + def download(url, md5sum, target_dir): """Download file from url to target_dir, and check md5sum.""" if not os.path.exists(target_dir): os.makedirs(target_dir) From b5f70d5fcf9590797f8f0bb732bca5e9d6eefbeb Mon Sep 17 00:00:00 2001 From: yangyaming Date: Fri, 17 Nov 2017 14:54:18 +0800 Subject: [PATCH 280/335] Refine doc. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 6f282a289..7ceae17f2 100644 --- a/README.md +++ b/README.md @@ -506,7 +506,7 @@ VoxForge European | 31.21 | 20.47 VoxForge Indian | 56.79 | 28.15 Baidu Internal Testset  |   47.73 |   8.92 -For reproducing results on VoxForge data, we provide a script to generate VoxForge dialect manifest files. Please go to ```data/voxforge``` and execute ```sh run_data.sh``` to get VoxForge dialect manifest files. Notice that VoxForge data may keep updated and the generated manifest files may have difference from those we evaluated. +For reproducing benchmark results on VoxForge data, we provide a script to generate VoxForge dialect manifest files. Please go to ```data/voxforge``` and execute ```sh run_data.sh``` to get VoxForge dialect manifest files. Notice that VoxForge data may keep updated and the generated manifest files may have difference from those we evaluated. #### Benchmark Results for Mandarin Model (Character Error Rate) From 0dc4dddf2fb678c36624c57be10b72b1e0982116 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Fri, 17 Nov 2017 15:18:35 +0800 Subject: [PATCH 281/335] Some fix for CI. --- .clang_format.hook | 2 +- .pre-commit-config.yaml | 8 -------- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/.clang_format.hook b/.clang_format.hook index 40d70f56c..4cbc972bb 100755 --- a/.clang_format.hook +++ b/.clang_format.hook @@ -1,7 +1,7 @@ #!/usr/bin/env bash set -e -readonly VERSION="3.8" +readonly VERSION="3.9" version=$(clang-format -version) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8ff36e098..ede1c53a4 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -33,11 +33,3 @@ entry: bash .clang_format.hook -i language: system files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|cuh|proto)$ -- repo: local - hooks: - - id: convert-markdown-into-html - name: convert-markdown-into-html - description: Convert README.md into index.html - entry: python .pre-commit-hooks/convert_markdown_into_html.py - language: system - files: .+README\.md$ From 9f0c3467e57057b9fa9cf668345243da058fa1b7 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Fri, 17 Nov 2017 15:50:10 +0800 Subject: [PATCH 282/335] fix decoders: force indices in FST starting from one & add version check in setup --- .clang_format.hook | 2 +- decoders/swig/path_trie.cpp | 2 +- decoders/swig/scorer.cpp | 12 ++---------- decoders/swig/scorer.h | 2 +- decoders/swig/setup.py | 2 +- setup.sh | 2 +- 6 files changed, 7 insertions(+), 15 deletions(-) diff --git a/.clang_format.hook b/.clang_format.hook index 40d70f56c..8141fffb3 100755 --- a/.clang_format.hook +++ b/.clang_format.hook @@ -1,7 +1,7 @@ #!/usr/bin/env bash set -e -readonly VERSION="3.8" +readonly VERSION="3.6" version=$(clang-format -version) diff --git a/decoders/swig/path_trie.cpp b/decoders/swig/path_trie.cpp index 40d909705..152efa82c 100644 --- a/decoders/swig/path_trie.cpp +++ b/decoders/swig/path_trie.cpp @@ -52,7 +52,7 @@ PathTrie* PathTrie::get_path_trie(int new_char, bool reset) { } else { if (has_dictionary_) { matcher_->SetState(dictionary_state_); - bool found = matcher_->Find(new_char); + bool found = matcher_->Find(new_char + 1); if (!found) { // Adding this character causes word outside dictionary auto FSTZERO = fst::TropicalWeight::Zero(); diff --git a/decoders/swig/scorer.cpp b/decoders/swig/scorer.cpp index 686c67c77..39da13d16 100644 --- a/decoders/swig/scorer.cpp +++ b/decoders/swig/scorer.cpp @@ -152,10 +152,8 @@ void Scorer::set_char_map(const std::vector& char_list) { for (size_t i = 0; i < char_list_.size(); i++) { if (char_list_[i] == " ") { SPACE_ID_ = i; - char_map_[' '] = i; - } else if (char_list_[i].size() == 1) { - char_map_[char_list_[i][0]] = i; } + char_map_[char_list_[i]] = i + 1; // Force index starting from zero } } @@ -193,17 +191,11 @@ std::vector Scorer::make_ngram(PathTrie* prefix) { void Scorer::fill_dictionary(bool add_space) { fst::StdVectorFst dictionary; - // First reverse char_list so ints can be accessed by chars - std::unordered_map char_map; - for (size_t i = 0; i < char_list_.size(); i++) { - char_map[char_list_[i]] = i; - } - // For each unigram convert to ints and put in trie int dict_size = 0; for (const auto& word : vocabulary_) { bool added = add_word_to_dictionary( - word, char_map, add_space, SPACE_ID_, &dictionary); + word, char_map_, add_space, SPACE_ID_ + 1, &dictionary); dict_size += added ? 1 : 0; } diff --git a/decoders/swig/scorer.h b/decoders/swig/scorer.h index 618364635..5ebc719c7 100644 --- a/decoders/swig/scorer.h +++ b/decoders/swig/scorer.h @@ -104,7 +104,7 @@ private: int SPACE_ID_; std::vector char_list_; - std::unordered_map char_map_; + std::unordered_map char_map_; std::vector vocabulary_; }; diff --git a/decoders/swig/setup.py b/decoders/swig/setup.py index b6bc0ca06..a4bb2e9da 100644 --- a/decoders/swig/setup.py +++ b/decoders/swig/setup.py @@ -113,7 +113,7 @@ decoders_module = [ setup( name='swig_decoders', - version='1.0', + version='1.1', description="""CTC decoders""", ext_modules=decoders_module, py_modules=['swig_decoders'], ) diff --git a/setup.sh b/setup.sh index 7c40415db..ec5e47ec8 100644 --- a/setup.sh +++ b/setup.sh @@ -27,7 +27,7 @@ if [ $? != 0 ]; then fi # install decoders -python -c "import swig_decoders" +python -c "import pkg_resources; pkg_resources.require(\"swig_decoders==1.1\")" if [ $? != 0 ]; then cd decoders/swig > /dev/null sh setup.sh From 2587ebf2f7c790195719a0fb659acec68f780e5b Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Fri, 17 Nov 2017 15:50:55 +0800 Subject: [PATCH 283/335] revert clang_format version --- .clang_format.hook | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.clang_format.hook b/.clang_format.hook index 8141fffb3..40d70f56c 100755 --- a/.clang_format.hook +++ b/.clang_format.hook @@ -1,7 +1,7 @@ #!/usr/bin/env bash set -e -readonly VERSION="3.6" +readonly VERSION="3.8" version=$(clang-format -version) From 980b8289a350e6f93fb7b4e779461dd525e8f399 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Fri, 17 Nov 2017 15:27:53 +0800 Subject: [PATCH 284/335] Update travis --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 0f67f656f..eadcb03b3 100644 --- a/.travis.yml +++ b/.travis.yml @@ -23,7 +23,7 @@ before_install: script: - .travis/precommit.sh - docker run -i --rm -v "$PWD:/py_unittest" paddlepaddle/paddle:latest /bin/bash -c - 'cd /py_unittest; sh .travis/unittest.sh' + 'cd /py_unittest; sh .travis/unittest.sh' || exit $? - | if [[ "$TRAVIS_PULL_REQUEST" != "false" ]]; then exit 0; fi; if [[ "$TRAVIS_BRANCH" != "develop" && ! "$TRAVIS_BRANCH" =~ ^v[[:digit:]]+\.[[:digit:]]+(\.[[:digit:]]+)?(-\S*)?$ ]]; then echo "not develop branch, no deploy"; exit 0; fi; From 48619f39e7f8e7f96820b6ff96d8e84b1888550c Mon Sep 17 00:00:00 2001 From: yangyaming Date: Fri, 17 Nov 2017 17:18:13 +0800 Subject: [PATCH 285/335] Fix travis. --- .travis.yml | 16 +++++++++++----- .travis/unittest.sh | 9 +-------- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/.travis.yml b/.travis.yml index eadcb03b3..52bfd5a1b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -17,20 +17,26 @@ addons: - python-pip - python2.7-dev ssh_known_hosts: 52.76.173.135 + before_install: - sudo pip install -U virtualenv pre-commit pip - docker pull paddlepaddle/paddle:latest + script: - - .travis/precommit.sh - - docker run -i --rm -v "$PWD:/py_unittest" paddlepaddle/paddle:latest /bin/bash -c - 'cd /py_unittest; sh .travis/unittest.sh' || exit $? + - exit_code=0 + - .travis/precommit.sh || exit_code=$(( exit_code | $? )) + - docker run -i --rm -v "$PWD:/py_unittest" paddlepaddle/paddle:latest /bin/bash -c + 'cd /py_unittest; sh .travis/unittest.sh' || exit_code=$(( exit_code | $? )) - | - if [[ "$TRAVIS_PULL_REQUEST" != "false" ]]; then exit 0; fi; - if [[ "$TRAVIS_BRANCH" != "develop" && ! "$TRAVIS_BRANCH" =~ ^v[[:digit:]]+\.[[:digit:]]+(\.[[:digit:]]+)?(-\S*)?$ ]]; then echo "not develop branch, no deploy"; exit 0; fi; + if [[ "$TRAVIS_PULL_REQUEST" != "false" ]]; then exit $exit_code; fi; + if [[ "$TRAVIS_BRANCH" != "develop" && ! "$TRAVIS_BRANCH" =~ ^v[[:digit:]]+\.[[:digit:]]+(\.[[:digit:]]+)?(-\S*)?$ ]]; then echo "not develop branch, no deploy"; exit $exit_code; fi; export DEPLOY_DOCS_SH=https://raw.githubusercontent.com/PaddlePaddle/PaddlePaddle.org/master/scripts/deploy/deploy_docs.sh export MODELS_DIR=`pwd` cd .. curl $DEPLOY_DOCS_SH | bash -s $CONTENT_DEC_PASSWD $TRAVIS_BRANCH $MODELS_DIR + exit_code=$(( exit_code | $? )) + exit $exit_code + notifications: email: on_success: change diff --git a/.travis/unittest.sh b/.travis/unittest.sh index 4195a441e..f27dc481a 100755 --- a/.travis/unittest.sh +++ b/.travis/unittest.sh @@ -24,13 +24,6 @@ unittest(){ trap 'abort' 0 set -e -for proj in */ ; do - if [ -d $proj ]; then - unittest $proj - if [ $? != 0 ]; then - exit 1 - fi - fi -done +unittest . trap : 0 From 3ea19973c66a6a10320888ba47a8857bebf5abfa Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Fri, 17 Nov 2017 17:19:53 +0800 Subject: [PATCH 286/335] add more comments to explain the modification --- .clang_format.hook | 2 +- decoders/swig/scorer.cpp | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/.clang_format.hook b/.clang_format.hook index 40d70f56c..8141fffb3 100755 --- a/.clang_format.hook +++ b/.clang_format.hook @@ -1,7 +1,7 @@ #!/usr/bin/env bash set -e -readonly VERSION="3.8" +readonly VERSION="3.6" version=$(clang-format -version) diff --git a/decoders/swig/scorer.cpp b/decoders/swig/scorer.cpp index 39da13d16..27b61cd03 100644 --- a/decoders/swig/scorer.cpp +++ b/decoders/swig/scorer.cpp @@ -149,11 +149,15 @@ void Scorer::set_char_map(const std::vector& char_list) { char_list_ = char_list; char_map_.clear(); + // Set the char map for the FST for spelling correction for (size_t i = 0; i < char_list_.size(); i++) { if (char_list_[i] == " ") { SPACE_ID_ = i; } - char_map_[char_list_[i]] = i + 1; // Force index starting from zero + // The initial state of FST is state 0, hence the index of chars in + // the FST should start from 1 to avoid the conflict with the initial + // state, otherwise wrong decoding results would be given. + char_map_[char_list_[i]] = i + 1; } } From dd770948a0cc71da4f96a0fd446deec0b631a369 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Fri, 17 Nov 2017 17:20:44 +0800 Subject: [PATCH 287/335] revert clang_format version --- .clang_format.hook | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.clang_format.hook b/.clang_format.hook index 8141fffb3..4cbc972bb 100755 --- a/.clang_format.hook +++ b/.clang_format.hook @@ -1,7 +1,7 @@ #!/usr/bin/env bash set -e -readonly VERSION="3.6" +readonly VERSION="3.9" version=$(clang-format -version) From bb637c1d395b55842bf9ecc6d8f346d7e0919e51 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Fri, 17 Nov 2017 18:34:45 +0800 Subject: [PATCH 288/335] remove doc deploy in travis-ci --- .travis.yml | 9 --------- 1 file changed, 9 deletions(-) diff --git a/.travis.yml b/.travis.yml index 52bfd5a1b..75c2c1351 100644 --- a/.travis.yml +++ b/.travis.yml @@ -16,7 +16,6 @@ addons: - python - python-pip - python2.7-dev - ssh_known_hosts: 52.76.173.135 before_install: - sudo pip install -U virtualenv pre-commit pip @@ -27,14 +26,6 @@ script: - .travis/precommit.sh || exit_code=$(( exit_code | $? )) - docker run -i --rm -v "$PWD:/py_unittest" paddlepaddle/paddle:latest /bin/bash -c 'cd /py_unittest; sh .travis/unittest.sh' || exit_code=$(( exit_code | $? )) - - | - if [[ "$TRAVIS_PULL_REQUEST" != "false" ]]; then exit $exit_code; fi; - if [[ "$TRAVIS_BRANCH" != "develop" && ! "$TRAVIS_BRANCH" =~ ^v[[:digit:]]+\.[[:digit:]]+(\.[[:digit:]]+)?(-\S*)?$ ]]; then echo "not develop branch, no deploy"; exit $exit_code; fi; - export DEPLOY_DOCS_SH=https://raw.githubusercontent.com/PaddlePaddle/PaddlePaddle.org/master/scripts/deploy/deploy_docs.sh - export MODELS_DIR=`pwd` - cd .. - curl $DEPLOY_DOCS_SH | bash -s $CONTENT_DEC_PASSWD $TRAVIS_BRANCH $MODELS_DIR - exit_code=$(( exit_code | $? )) exit $exit_code notifications: From adc117312f975eb8558c2d052b8446c676918cd8 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Mon, 20 Nov 2017 11:02:04 +0800 Subject: [PATCH 289/335] Refine doc and fix path for run_data.sh --- README.md | 2 +- data/voxforge/run_data.sh | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 7ceae17f2..eb16dd587 100644 --- a/README.md +++ b/README.md @@ -506,7 +506,7 @@ VoxForge European | 31.21 | 20.47 VoxForge Indian | 56.79 | 28.15 Baidu Internal Testset  |   47.73 |   8.92 -For reproducing benchmark results on VoxForge data, we provide a script to generate VoxForge dialect manifest files. Please go to ```data/voxforge``` and execute ```sh run_data.sh``` to get VoxForge dialect manifest files. Notice that VoxForge data may keep updated and the generated manifest files may have difference from those we evaluated. +For reproducing benchmark results on VoxForge data, we provide a script to download data and generate VoxForge dialect manifest files. Please go to ```data/voxforge``` and execute ```sh run_data.sh``` to get VoxForge dialect manifest files. Notice that VoxForge data may keep updating and the generated manifest files may have difference from those we evaluated on. #### Benchmark Results for Mandarin Model (Character Error Rate) diff --git a/data/voxforge/run_data.sh b/data/voxforge/run_data.sh index e0a9f1b3f..c6ff71118 100644 --- a/data/voxforge/run_data.sh +++ b/data/voxforge/run_data.sh @@ -1,10 +1,8 @@ #! /usr/bin/env bash -cd ../.. > /dev/null - # download data, generate manifests -PYTHONPATH=.:$PYTHONPATH python data/voxforge/voxforge.py \ ---manifest_prefix='data/voxforge/manifest' \ +PYTHONPATH=../../:$PYTHONPATH python voxforge.py \ +--manifest_prefix='./manifest' \ --target_dir='~/.cache/paddle/dataset/speech/VoxForge' \ --is_merge_dialect=True \ --dialects 'american' 'british' 'australian' 'european' 'irish' 'canadian' 'indian' From a200271ba9e436be28805d30296171d8cf7fbc90 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Mon, 20 Nov 2017 19:13:48 +0800 Subject: [PATCH 290/335] Update libri model. --- README.md | 16 ++++++++-------- models/librispeech/download_model.sh | 4 ++-- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index a05289b9b..27dede48a 100644 --- a/README.md +++ b/README.md @@ -479,7 +479,7 @@ python deploy/demo_client.py --help Language | Model Name | Training Data | Hours of Speech :-----------: | :------------: | :----------: | -------: -English | [LibriSpeech Model](http://cloud.dlnel.org/filepub/?uuid=17404caf-cf19-492f-9707-1fad07c19aae) | [LibriSpeech Dataset](http://www.openslr.org/12/) | 960 h +English | [LibriSpeech Model](http://cloud.dlnel.org/filepub/?uuid=117cde63-cd59-4948-8b80-df782555f7d6) | [LibriSpeech Dataset](http://www.openslr.org/12/) | 960 h English | [BaiduEN8k Model](http://cloud.dlnel.org/filepub/?uuid=37a1c211-ec47-494c-973c-31437a10ae90) | Baidu Internal English Dataset | 8628 h Mandarin | [Aishell Model](http://cloud.dlnel.org/filepub/?uuid=61de63b9-6904-4809-ad95-0cc5104ab973) | [Aishell Dataset](http://www.openslr.org/33/) | 151 h Mandarin | [BaiduCN1.2k Model](to-be-added) | Baidu Internal Mandarin Dataset | 1204 h @@ -498,13 +498,13 @@ Language Model | Training Data | Token-based | Size | Descriptions Test Set | LibriSpeech Model | BaiduEN8K Model :--------------------- | ---------------: | -------------------: -LibriSpeech Test-Clean | 7.77 | 6.63 -LibriSpeech Test-Other | 23.25 | 16.59 -VoxForge American-Canadian | 12.52 |   7.46 -VoxForge Commonwealth | 21.08 | 16.23 -VoxForge European | 31.21 | 20.47 -VoxForge Indian | 56.79 | 28.15 -Baidu Internal Testset  |   47.73 |   8.92 +LibriSpeech Test-Clean | 7.73 | 6.63 +LibriSpeech Test-Other | 23.15 | 16.59 +VoxForge American-Canadian | 12.30 |   7.46 +VoxForge Commonwealth | 20.03 | 16.23 +VoxForge European | 30.31 | 20.47 +VoxForge Indian | 55.47 | 28.15 +Baidu Internal Testset  |   44.71 |   8.92 For reproducing benchmark results on VoxForge data, we provide a script to download data and generate VoxForge dialect manifest files. Please go to ```data/voxforge``` and execute ```sh run_data.sh``` to get VoxForge dialect manifest files. Notice that VoxForge data may keep updating and the generated manifest files may have difference from those we evaluated on. diff --git a/models/librispeech/download_model.sh b/models/librispeech/download_model.sh index 9c0ec2783..305c082ad 100644 --- a/models/librispeech/download_model.sh +++ b/models/librispeech/download_model.sh @@ -2,8 +2,8 @@ . ../../utils/utility.sh -URL='http://cloud.dlnel.org/filepub/?uuid=6020a634-5399-4423-b021-c5ed32680fff' -MD5=2ef08f8b608a7c555592161fc14d81a6 +URL='http://cloud.dlnel.org/filepub/?uuid=117cde63-cd59-4948-8b80-df782555f7d6' +MD5=1f72d0c5591f453362f0caa09dd57618 TARGET=./librispeech_model.tar.gz From 234f2bb49d94bf42899f7b11956ef472a398faaa Mon Sep 17 00:00:00 2001 From: yangyaming Date: Tue, 21 Nov 2017 11:22:59 +0800 Subject: [PATCH 291/335] Adapt demo_server.py to support padding removing. --- deploy/demo_server.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/deploy/demo_server.py b/deploy/demo_server.py index 3e81c0c5b..bb339b761 100644 --- a/deploy/demo_server.py +++ b/deploy/demo_server.py @@ -147,7 +147,8 @@ def start_server(): augmentation_config='{}', specgram_type=args.specgram_type, num_threads=1, - keep_transcription_text=True) + keep_transcription_text=True, + num_conv_layers=args.num_conv_layers) # prepare ASR model ds2_model = DeepSpeech2Model( vocab_size=data_generator.vocab_size, @@ -163,8 +164,20 @@ def start_server(): # prepare ASR inference handler def file_to_transcript(filename): feature = data_generator.process_utterance(filename, "") + ins = [] + conv0_h = (feature[0].shape[0] - 1) // 2 + 1 + conv0_w = (feature[0].shape[1] - 1) // 3 + 1 + ins += [feature[0], feature[1], + [0], [conv0_w], + [1, 32, 1, conv0_h, conv0_w + 1, conv0_w]] + pre_h = conv0_h + for i in xrange(args.num_conv_layers - 1): + h = (pre_h - 1) // 2 + 1 + pre_h = h + ins += [[1, 32, 1, h, conv0_w + 1, conv0_w]] + result_transcript = ds2_model.infer_batch( - infer_data=[feature], + infer_data=[ins], decoding_method=args.decoding_method, beam_alpha=args.alpha, beam_beta=args.beta, @@ -173,7 +186,8 @@ def start_server(): cutoff_top_n=args.cutoff_top_n, vocab_list=vocab_list, language_model_path=args.lang_model_path, - num_processes=1) + num_processes=1, + feeding_dict=data_generator.feeding) return result_transcript[0] # warming up with utterrances sampled from Librispeech From 5ba0e0a00bf9afb12e6ea3ae2056d8f73d21c12b Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Tue, 21 Nov 2017 13:35:42 +0800 Subject: [PATCH 292/335] update setup in readme --- README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 27dede48a..0ba9b86e3 100644 --- a/README.md +++ b/README.md @@ -31,8 +31,9 @@ To avoid the trouble of environment setup, [running in docker container](#runnin ### Setup ```bash -git clone https://github.com/PaddlePaddle/models.git -cd models/deep_speech_2 +sudo apt-get install -y pkg-config libflac-dev libogg-dev libvorbis-dev swig +git clone https://github.com/PaddlePaddle/DeepSpeech.git +cd DeepSpeech sh setup.sh ``` From 74e00f4e15706e14d24ea5a169ee5c9eac4b30c2 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Tue, 21 Nov 2017 16:55:12 +0800 Subject: [PATCH 293/335] add more info in the setup section --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index 0ba9b86e3..08a3afa37 100644 --- a/README.md +++ b/README.md @@ -29,9 +29,15 @@ To avoid the trouble of environment setup, [running in docker container](#runnin - PaddlePaddle the latest version (please refer to the [Installation Guide](https://github.com/PaddlePaddle/Paddle#installation)) ### Setup +- Make sure these libraries or tools installed: `pkg-config`, `flac`, `ogg`, `vorbis` and `swig`, e.g. installing them via `apt-get`: ```bash sudo apt-get install -y pkg-config libflac-dev libogg-dev libvorbis-dev swig +``` + +- Run the setup script for the remaining dependencies + +```bash git clone https://github.com/PaddlePaddle/DeepSpeech.git cd DeepSpeech sh setup.sh From 61177a10b2eec315d36b1089ff09bce7bc851f94 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Tue, 21 Nov 2017 17:13:12 +0800 Subject: [PATCH 294/335] update the rebuilt docker repo's name in doc --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 8e5485fcb..81467b24d 100644 --- a/README.md +++ b/README.md @@ -351,19 +351,19 @@ Take several steps to launch the Docker image: - Download the Docker image ```bash -sudo nvidia-docker pull paddlepaddle/models:deep-speech-2 +nvidia-docker pull paddlepaddle/deep_speech:latest-gpu ``` - Clone this repository ``` -git clone https://github.com/PaddlePaddle/models.git +git clone https://github.com/PaddlePaddle/DeepSpeech.git ``` - Run the Docker image ```bash -sudo nvidia-docker run -it -v $(pwd)/models:/models paddlepaddle/models:deep-speech-2 /bin/bash +sudo nvidia-docker run -it -v $(pwd)/DeepSpeech:/DeepSpeech paddlepaddle/deep_speech:latest-gpu /bin/bash ``` Now go back and start from the [Getting Started](#getting-started) section, you can execute training, inference and hyper-parameters tuning similarly in the Docker container. From cd5f558bc7ecc488624278f45f9f9edec80fca3b Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Wed, 29 Nov 2017 21:30:53 +0800 Subject: [PATCH 295/335] Add library boost to the dependency --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 81467b24d..b54ed876f 100644 --- a/README.md +++ b/README.md @@ -29,10 +29,10 @@ To avoid the trouble of environment setup, [running in Docker container](#runnin - PaddlePaddle the latest version (please refer to the [Installation Guide](https://github.com/PaddlePaddle/Paddle#installation)) ### Setup -- Make sure these libraries or tools installed: `pkg-config`, `flac`, `ogg`, `vorbis` and `swig`, e.g. installing them via `apt-get`: +- Make sure these libraries or tools installed: `pkg-config`, `flac`, `ogg`, `vorbis`, `boost` and `swig`, e.g. installing them via `apt-get`: ```bash -sudo apt-get install -y pkg-config libflac-dev libogg-dev libvorbis-dev swig +sudo apt-get install -y pkg-config libflac-dev libogg-dev libvorbis-dev libboost-dev swig ``` - Run the setup script for the remaining dependencies From a8793039f3cd29959a8110ab22c45a32d4b52587 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Fri, 1 Dec 2017 19:36:07 +0800 Subject: [PATCH 296/335] Expose edit distance for error_rate.py --- utils/error_rate.py | 100 +++++++++++++++++++++++++++++++------------- 1 file changed, 71 insertions(+), 29 deletions(-) diff --git a/utils/error_rate.py b/utils/error_rate.py index ea829f470..2ff3f6960 100644 --- a/utils/error_rate.py +++ b/utils/error_rate.py @@ -56,6 +56,70 @@ def _levenshtein_distance(ref, hyp): return distance[m % 2][n] +def word_errors(reference, hypothesis, ignore_case=False, delimiter=' '): + """Compute the levenshtein distance between reference sequence and + hypothesis sequence in word-level. + + :param reference: The reference sentence. + :type reference: basestring + :param hypothesis: The hypothesis sentence. + :type hypothesis: basestring + :param ignore_case: Whether case-sensitive or not. + :type ignore_case: bool + :param delimiter: Delimiter of input sentences. + :type delimiter: char + :return: Levenshtein distance and word number of reference sentence. + :rtype: list + :raises ValueError: If word number of reference sentence is zero. + """ + if ignore_case == True: + reference = reference.lower() + hypothesis = hypothesis.lower() + + ref_words = filter(None, reference.split(delimiter)) + hyp_words = filter(None, hypothesis.split(delimiter)) + + if len(ref_words) == 0: + raise ValueError("Reference's word number should be greater than 0.") + + edit_distance = _levenshtein_distance(ref_words, hyp_words) + return float(edit_distance), len(ref_words) + + +def char_errors(reference, hypothesis, ignore_case=False, remove_space=False): + """Compute the levenshtein distance between reference sequence and + hypothesis sequence in char-level. + + :param reference: The reference sentence. + :type reference: basestring + :param hypothesis: The hypothesis sentence. + :type hypothesis: basestring + :param ignore_case: Whether case-sensitive or not. + :type ignore_case: bool + :param remove_space: Whether remove internal space characters + :type remove_space: bool + :return: Levenshtein distance and length of reference sentence. + :rtype: list + :raises ValueError: If the reference length is zero. + """ + if ignore_case == True: + reference = reference.lower() + hypothesis = hypothesis.lower() + + join_char = ' ' + if remove_space == True: + join_char = '' + + reference = join_char.join(filter(None, reference.split(' '))) + hypothesis = join_char.join(filter(None, hypothesis.split(' '))) + + if len(reference) == 0: + raise ValueError("Length of reference should be greater than 0.") + + edit_distance = _levenshtein_distance(reference, hypothesis) + return float(edit_distance), len(reference) + + def wer(reference, hypothesis, ignore_case=False, delimiter=' '): """Calculate word error rate (WER). WER compares reference text and hypothesis text in word-level. WER is defined as: @@ -85,20 +149,11 @@ def wer(reference, hypothesis, ignore_case=False, delimiter=' '): :type delimiter: char :return: Word error rate. :rtype: float - :raises ValueError: If the reference length is zero. + :raises ValueError: If word number of reference is zero. """ - if ignore_case == True: - reference = reference.lower() - hypothesis = hypothesis.lower() - - ref_words = filter(None, reference.split(delimiter)) - hyp_words = filter(None, hypothesis.split(delimiter)) - - if len(ref_words) == 0: - raise ValueError("Reference's word number should be greater than 0.") - - edit_distance = _levenshtein_distance(ref_words, hyp_words) - wer = float(edit_distance) / len(ref_words) + edit_distance, ref_len = word_errors(reference, hypothesis, ignore_case, + delimiter) + wer = float(edit_distance) / ref_len return wer @@ -135,20 +190,7 @@ def cer(reference, hypothesis, ignore_case=False, remove_space=False): :rtype: float :raises ValueError: If the reference length is zero. """ - if ignore_case == True: - reference = reference.lower() - hypothesis = hypothesis.lower() - - join_char = ' ' - if remove_space == True: - join_char = '' - - reference = join_char.join(filter(None, reference.split(' '))) - hypothesis = join_char.join(filter(None, hypothesis.split(' '))) - - if len(reference) == 0: - raise ValueError("Length of reference should be greater than 0.") - - edit_distance = _levenshtein_distance(reference, hypothesis) - cer = float(edit_distance) / len(reference) + edit_distance, ref_len = char_errors(reference, hypothesis, ignore_case, + remove_space) + cer = float(edit_distance) / len(ref_len) return cer From 0f9b3ebf0e75ed16e4748717589b962ec4747576 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Fri, 1 Dec 2017 19:46:29 +0800 Subject: [PATCH 297/335] Move exception throwing logic to cer and wer. --- utils/error_rate.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/utils/error_rate.py b/utils/error_rate.py index 2ff3f6960..9aa900174 100644 --- a/utils/error_rate.py +++ b/utils/error_rate.py @@ -70,7 +70,6 @@ def word_errors(reference, hypothesis, ignore_case=False, delimiter=' '): :type delimiter: char :return: Levenshtein distance and word number of reference sentence. :rtype: list - :raises ValueError: If word number of reference sentence is zero. """ if ignore_case == True: reference = reference.lower() @@ -79,9 +78,6 @@ def word_errors(reference, hypothesis, ignore_case=False, delimiter=' '): ref_words = filter(None, reference.split(delimiter)) hyp_words = filter(None, hypothesis.split(delimiter)) - if len(ref_words) == 0: - raise ValueError("Reference's word number should be greater than 0.") - edit_distance = _levenshtein_distance(ref_words, hyp_words) return float(edit_distance), len(ref_words) @@ -100,7 +96,6 @@ def char_errors(reference, hypothesis, ignore_case=False, remove_space=False): :type remove_space: bool :return: Levenshtein distance and length of reference sentence. :rtype: list - :raises ValueError: If the reference length is zero. """ if ignore_case == True: reference = reference.lower() @@ -113,9 +108,6 @@ def char_errors(reference, hypothesis, ignore_case=False, remove_space=False): reference = join_char.join(filter(None, reference.split(' '))) hypothesis = join_char.join(filter(None, hypothesis.split(' '))) - if len(reference) == 0: - raise ValueError("Length of reference should be greater than 0.") - edit_distance = _levenshtein_distance(reference, hypothesis) return float(edit_distance), len(reference) @@ -153,6 +145,10 @@ def wer(reference, hypothesis, ignore_case=False, delimiter=' '): """ edit_distance, ref_len = word_errors(reference, hypothesis, ignore_case, delimiter) + + if ref_len == 0: + raise ValueError("Reference's word number should be greater than 0.") + wer = float(edit_distance) / ref_len return wer @@ -192,5 +188,9 @@ def cer(reference, hypothesis, ignore_case=False, remove_space=False): """ edit_distance, ref_len = char_errors(reference, hypothesis, ignore_case, remove_space) - cer = float(edit_distance) / len(ref_len) + + if ref_len == 0: + raise ValueError("Length of reference should be greater than 0.") + + cer = float(edit_distance) / ref_len return cer From ae796a9dab6424cee9b29efed5223a96736f0611 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Mon, 4 Dec 2017 13:39:56 +0800 Subject: [PATCH 298/335] Correct the error rate's computation for multiple sentences --- test.py | 14 ++++++++------ tools/tune.py | 18 ++++++++++++------ 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/test.py b/test.py index 53f7e17af..224cea9b6 100644 --- a/test.py +++ b/test.py @@ -8,7 +8,7 @@ import functools import paddle.v2 as paddle from data_utils.data import DataGenerator from model_utils.model import DeepSpeech2Model -from utils.error_rate import wer, cer +from utils.error_rate import char_errors, word_errors from utils.utility import add_arguments, print_arguments parser = argparse.ArgumentParser(description=__doc__) @@ -91,8 +91,8 @@ def evaluate(): # decoders only accept string encoded in utf-8 vocab_list = [chars.encode("utf-8") for chars in data_generator.vocab_list] - error_rate_func = cer if args.error_rate_type == 'cer' else wer - error_sum, num_ins = 0.0, 0 + errors_func = char_errors if args.error_rate_type == 'cer' else word_errors + errors_sum, len_refs, num_ins = 0.0, 0, 0 for infer_data in batch_reader(): result_transcripts = ds2_model.infer_batch( infer_data=infer_data, @@ -108,12 +108,14 @@ def evaluate(): feeding_dict=data_generator.feeding) target_transcripts = [data[1] for data in infer_data] for target, result in zip(target_transcripts, result_transcripts): - error_sum += error_rate_func(target, result) + errors, len_ref = errors_func(target, result) + errors_sum += errors + len_refs += len_ref num_ins += 1 print("Error rate [%s] (%d/?) = %f" % - (args.error_rate_type, num_ins, error_sum / num_ins)) + (args.error_rate_type, num_ins, errors_sum / len_refs)) print("Final error rate [%s] (%d/%d) = %f" % - (args.error_rate_type, num_ins, num_ins, error_sum / num_ins)) + (args.error_rate_type, num_ins, num_ins, errors_sum / len_refs)) ds2_model.logger.info("finish evaluation") diff --git a/tools/tune.py b/tools/tune.py index 47abf1413..b13233195 100644 --- a/tools/tune.py +++ b/tools/tune.py @@ -16,7 +16,7 @@ from data_utils.data import DataGenerator from decoders.swig_wrapper import Scorer from decoders.swig_wrapper import ctc_beam_search_decoder_batch from model_utils.model import deep_speech_v2_network -from utils.error_rate import wer, cer +from utils.error_rate import char_errors, word_errors from utils.utility import add_arguments, print_arguments parser = argparse.ArgumentParser(description=__doc__) @@ -158,7 +158,7 @@ def tune(): " dict_size = %d" % ext_scorer.get_dict_size()) logger.info("end initializing scorer. Start tuning ...") - error_rate_func = cer if args.error_rate_type == 'cer' else wer + errors_func = char_errors if args.error_rate_type == 'cer' else word_errors # create grid for search cand_alphas = np.linspace(args.alpha_from, args.alpha_to, args.num_alphas) cand_betas = np.linspace(args.beta_from, args.beta_to, args.num_betas) @@ -167,7 +167,7 @@ def tune(): err_sum = [0.0 for i in xrange(len(params_grid))] err_ave = [0.0 for i in xrange(len(params_grid))] - num_ins, cur_batch = 0, 0 + num_ins, len_refs, cur_batch = 0, 0, 0 ## incremental tuning parameters over multiple batches for infer_data in batch_reader(): if (args.num_batches >= 0) and (cur_batch >= args.num_batches): @@ -200,8 +200,14 @@ def tune(): result_transcripts = [res[0][1] for res in beam_search_results] for target, result in zip(target_transcripts, result_transcripts): - err_sum[index] += error_rate_func(target, result) - err_ave[index] = err_sum[index] / num_ins + errors, len_ref = errors_func(target, result) + err_sum[index] += errors + # accumulate the length of references of every batch + # in the first iteration + if args.alpha_from == alpha and args.beta_from == beta: + len_refs += len_ref + + err_ave[index] = err_sum[index] / len_refs if index % 2 == 0: sys.stdout.write('.') sys.stdout.flush() @@ -226,7 +232,7 @@ def tune(): err_ave_min = min(err_ave) min_index = err_ave.index(err_ave_min) print("\nFinish tuning on %d batches, final opt (alpha, beta) = (%s, %s)" - % (args.num_batches, "%.3f" % params_grid[min_index][0], + % (cur_batch, "%.3f" % params_grid[min_index][0], "%.3f" % params_grid[min_index][1])) logger.info("finish tuning") From 938685baa033c4bb3af5db13e5befe45d3a55e02 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Mon, 4 Dec 2017 14:02:48 +0800 Subject: [PATCH 299/335] Add infer & test script for baidu_en8k model --- examples/baidu_en8k/run_infer_golden.sh | 55 +++++++++++++++++++++++++ examples/baidu_en8k/run_test_golden.sh | 55 +++++++++++++++++++++++++ 2 files changed, 110 insertions(+) create mode 100644 examples/baidu_en8k/run_infer_golden.sh create mode 100644 examples/baidu_en8k/run_test_golden.sh diff --git a/examples/baidu_en8k/run_infer_golden.sh b/examples/baidu_en8k/run_infer_golden.sh new file mode 100644 index 000000000..68cf2fc9f --- /dev/null +++ b/examples/baidu_en8k/run_infer_golden.sh @@ -0,0 +1,55 @@ +#! /usr/bin/env bash + +cd ../.. > /dev/null + +# download language model +cd models/lm > /dev/null +sh download_lm_en.sh +if [ $? -ne 0 ]; then + exit 1 +fi +cd - > /dev/null + + +# download well-trained model +cd models/baidu_en8k > /dev/null +sh download_model.sh +if [ $? -ne 0 ]; then + exit 1 +fi +cd - > /dev/null + + +# infer +CUDA_VISIBLE_DEVICES=0 \ +python -u infer.py \ +--num_samples=10 \ +--trainer_count=1 \ +--beam_size=500 \ +--num_proc_bsearch=5 \ +--num_conv_layers=2 \ +--num_rnn_layers=3 \ +--rnn_layer_size=1024 \ +--alpha=1.4 \ +--beta=0.35 \ +--cutoff_prob=1.0 \ +--cutoff_top_n=40 \ +--use_gru=True \ +--use_gpu=True \ +--share_rnn_weights=False \ +--infer_manifest='data/librispeech/manifest.test-clean' \ +--mean_std_path='models/baidu_en8k/mean_std.npz' \ +--vocab_path='models/baidu_en8k/vocab.txt' \ +--model_path='models/baidu_en8k/params.tar.gz' \ +--lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \ +--decoding_method='ctc_beam_search' \ +--error_rate_type='wer' \ +--specgram_type='linear' + +if [ $? -ne 0 ]; then + echo "Failed in inference!" + exit 1 +fi + + +exit 0 diff --git a/examples/baidu_en8k/run_test_golden.sh b/examples/baidu_en8k/run_test_golden.sh new file mode 100644 index 000000000..4c4ac0aee --- /dev/null +++ b/examples/baidu_en8k/run_test_golden.sh @@ -0,0 +1,55 @@ +#! /usr/bin/env bash + +cd ../.. > /dev/null + +# download language model +cd models/lm > /dev/null +#sh download_lm_en.sh +#if [ $? -ne 0 ]; then +# exit 1 +#fi +cd - > /dev/null + + +# download well-trained model +cd models/baidu_en8k > /dev/null +#sh download_model.sh +#if [ $? -ne 0 ]; then +# exit 1 +#fi +cd - > /dev/null + + +# evaluate model +CUDA_VISIBLE_DEVICES=0,1,2,3 \ +python -u test.py \ +--batch_size=128 \ +--trainer_count=4 \ +--beam_size=500 \ +--num_proc_bsearch=8 \ +--num_proc_data=8 \ +--num_conv_layers=2 \ +--num_rnn_layers=3 \ +--rnn_layer_size=1024 \ +--alpha=1.4 \ +--beta=0.35 \ +--cutoff_prob=1.0 \ +--cutoff_top_n=40 \ +--use_gru=True \ +--use_gpu=True \ +--share_rnn_weights=False \ +--test_manifest='data/librispeech/manifest.test-clean' \ +--mean_std_path='models/baidu_en8k/mean_std.npz' \ +--vocab_path='models/baidu_en8k/vocab.txt' \ +--model_path='models/baidu_en8k/params.tar.gz' \ +--lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \ +--decoding_method='ctc_beam_search' \ +--error_rate_type='wer' \ +--specgram_type='linear' + +if [ $? -ne 0 ]; then + echo "Failed in evaluation!" + exit 1 +fi + +exit 0 From c786b18265dd3cae1ee0a064afd49a5123b418ad Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Mon, 4 Dec 2017 14:30:58 +0800 Subject: [PATCH 300/335] Uncomment model downloading lines --- examples/baidu_en8k/run_test_golden.sh | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/examples/baidu_en8k/run_test_golden.sh b/examples/baidu_en8k/run_test_golden.sh index 4c4ac0aee..b471ac65d 100644 --- a/examples/baidu_en8k/run_test_golden.sh +++ b/examples/baidu_en8k/run_test_golden.sh @@ -4,19 +4,19 @@ cd ../.. > /dev/null # download language model cd models/lm > /dev/null -#sh download_lm_en.sh -#if [ $? -ne 0 ]; then -# exit 1 -#fi +sh download_lm_en.sh +if [ $? -ne 0 ]; then + exit 1 +fi cd - > /dev/null # download well-trained model cd models/baidu_en8k > /dev/null -#sh download_model.sh -#if [ $? -ne 0 ]; then -# exit 1 -#fi +sh download_model.sh +if [ $? -ne 0 ]; then + exit 1 +fi cd - > /dev/null From 6e4f2465171cb6ac9186d146d8a85a4745096b4d Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Mon, 4 Dec 2017 18:07:47 +0800 Subject: [PATCH 301/335] retune hyper-parameters for librispeech model --- examples/librispeech/run_infer.sh | 4 ++-- examples/librispeech/run_infer_golden.sh | 4 ++-- examples/librispeech/run_test.sh | 4 ++-- examples/librispeech/run_test_golden.sh | 4 ++-- examples/tiny/run_infer.sh | 4 ++-- examples/tiny/run_infer_golden.sh | 4 ++-- examples/tiny/run_test.sh | 4 ++-- examples/tiny/run_test_golden.sh | 4 ++-- 8 files changed, 16 insertions(+), 16 deletions(-) diff --git a/examples/librispeech/run_infer.sh b/examples/librispeech/run_infer.sh index 2df5b6cc4..44b97bacf 100644 --- a/examples/librispeech/run_infer.sh +++ b/examples/librispeech/run_infer.sh @@ -21,8 +21,8 @@ python -u infer.py \ --num_conv_layers=2 \ --num_rnn_layers=3 \ --rnn_layer_size=2048 \ ---alpha=2.15 \ ---beta=0.35 \ +--alpha=2.5 \ +--beta=0.3 \ --cutoff_prob=1.0 \ --cutoff_top_n=40 \ --use_gru=False \ diff --git a/examples/librispeech/run_infer_golden.sh b/examples/librispeech/run_infer_golden.sh index c407cabe4..173790903 100644 --- a/examples/librispeech/run_infer_golden.sh +++ b/examples/librispeech/run_infer_golden.sh @@ -30,8 +30,8 @@ python -u infer.py \ --num_conv_layers=2 \ --num_rnn_layers=3 \ --rnn_layer_size=2048 \ ---alpha=2.15 \ ---beta=0.35 \ +--alpha=2.5 \ +--beta=0.3 \ --cutoff_prob=1.0 \ --cutoff_top_n=40 \ --use_gru=False \ diff --git a/examples/librispeech/run_test.sh b/examples/librispeech/run_test.sh index 0a76704d7..11cd74116 100644 --- a/examples/librispeech/run_test.sh +++ b/examples/librispeech/run_test.sh @@ -22,8 +22,8 @@ python -u test.py \ --num_conv_layers=2 \ --num_rnn_layers=3 \ --rnn_layer_size=2048 \ ---alpha=2.15 \ ---beta=0.35 \ +--alpha=2.5 \ +--beta=0.3 \ --cutoff_prob=1.0 \ --cutoff_top_n=40 \ --use_gru=False \ diff --git a/examples/librispeech/run_test_golden.sh b/examples/librispeech/run_test_golden.sh index 3e7e3b4c5..41dbc0dae 100644 --- a/examples/librispeech/run_test_golden.sh +++ b/examples/librispeech/run_test_golden.sh @@ -31,8 +31,8 @@ python -u test.py \ --num_conv_layers=2 \ --num_rnn_layers=3 \ --rnn_layer_size=2048 \ ---alpha=2.15 \ ---beta=0.35 \ +--alpha=2.5 \ +--beta=0.3 \ --cutoff_prob=1.0 \ --cutoff_top_n=40 \ --use_gru=False \ diff --git a/examples/tiny/run_infer.sh b/examples/tiny/run_infer.sh index 3a345f2ff..0cc140c8e 100644 --- a/examples/tiny/run_infer.sh +++ b/examples/tiny/run_infer.sh @@ -21,8 +21,8 @@ python -u infer.py \ --num_conv_layers=2 \ --num_rnn_layers=3 \ --rnn_layer_size=2048 \ ---alpha=2.15 \ ---beta=0.35 \ +--alpha=2.5 \ +--beta=0.3 \ --cutoff_prob=1.0 \ --cutoff_top_n=40 \ --use_gru=False \ diff --git a/examples/tiny/run_infer_golden.sh b/examples/tiny/run_infer_golden.sh index 72a8be064..cf9aa84c9 100644 --- a/examples/tiny/run_infer_golden.sh +++ b/examples/tiny/run_infer_golden.sh @@ -30,8 +30,8 @@ python -u infer.py \ --num_conv_layers=2 \ --num_rnn_layers=3 \ --rnn_layer_size=2048 \ ---alpha=2.15 \ ---beta=0.35 \ +--alpha=2.5 \ +--beta=0.3 \ --cutoff_prob=1.0 \ --cutoff_top_n=40 \ --use_gru=False \ diff --git a/examples/tiny/run_test.sh b/examples/tiny/run_test.sh index d33863222..a9fe5b936 100644 --- a/examples/tiny/run_test.sh +++ b/examples/tiny/run_test.sh @@ -22,8 +22,8 @@ python -u test.py \ --num_conv_layers=2 \ --num_rnn_layers=3 \ --rnn_layer_size=2048 \ ---alpha=2.15 \ ---beta=0.35 \ +--alpha=2.5 \ +--beta=0.3 \ --cutoff_prob=1.0 \ --cutoff_top_n=40 \ --use_gru=False \ diff --git a/examples/tiny/run_test_golden.sh b/examples/tiny/run_test_golden.sh index 8d3d25c5c..e87ce6eef 100644 --- a/examples/tiny/run_test_golden.sh +++ b/examples/tiny/run_test_golden.sh @@ -31,8 +31,8 @@ python -u test.py \ --num_conv_layers=2 \ --num_rnn_layers=3 \ --rnn_layer_size=2048 \ ---alpha=2.15 \ ---beta=0.35 \ +--alpha=2.5 \ +--beta=0.3 \ --cutoff_prob=1.0 \ --cutoff_top_n=40 \ --use_gru=False \ From ccb4332fe3009b8cdc39383bba9599aced671e64 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Mon, 4 Dec 2017 18:10:11 +0800 Subject: [PATCH 302/335] update benchmark result for English model --- README.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index b54ed876f..41307b045 100644 --- a/README.md +++ b/README.md @@ -505,13 +505,13 @@ Language Model | Training Data | Token-based | Size | Descriptions Test Set | LibriSpeech Model | BaiduEN8K Model :--------------------- | ---------------: | -------------------: -LibriSpeech Test-Clean | 7.73 | 6.63 -LibriSpeech Test-Other | 23.15 | 16.59 -VoxForge American-Canadian | 12.30 |   7.46 -VoxForge Commonwealth | 20.03 | 16.23 -VoxForge European | 30.31 | 20.47 -VoxForge Indian | 55.47 | 28.15 -Baidu Internal Testset  |   44.71 |   8.92 +LibriSpeech Test-Clean | 6.85 | 5.73 +LibriSpeech Test-Other | 21.18 | 14.47 +VoxForge American-Canadian | 12.12 |   7.37 +VoxForge Commonwealth | 19.82 | 15.58 +VoxForge European | 30.15 | 19.44 +VoxForge Indian | 53.73 | 26.15 +Baidu Internal Testset  |   40.75 |   8.82 For reproducing benchmark results on VoxForge data, we provide a script to download data and generate VoxForge dialect manifest files. Please go to ```data/voxforge``` and execute ```sh run_data.sh``` to get VoxForge dialect manifest files. Notice that VoxForge data may keep updating and the generated manifest files may have difference from those we evaluated on. From 20e225875c9c877aafb5b0f254d0ccf4de04afb4 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Tue, 5 Dec 2017 14:14:58 +0800 Subject: [PATCH 303/335] Simplify parallel part for data processing and fix abnormal exit. --- data_utils/data.py | 5 +--- data_utils/utility.py | 68 ++++++++++++++++++++++++++----------------- 2 files changed, 42 insertions(+), 31 deletions(-) diff --git a/data_utils/data.py b/data_utils/data.py index 9dd2a91f6..af6734f7e 100644 --- a/data_utils/data.py +++ b/data_utils/data.py @@ -290,10 +290,7 @@ class DataGenerator(object): reader, cleanup_callback = xmap_readers_mp( lambda instance: self.process_utterance(instance["audio_filepath"], instance["text"]), - reader, - self._num_threads, - 4096, - order=True) + reader, self._num_threads, 4096) # register callback to main process atexit.register(cleanup_callback) diff --git a/data_utils/utility.py b/data_utils/utility.py index 2633e1b42..89a74c41a 100644 --- a/data_utils/utility.py +++ b/data_utils/utility.py @@ -10,7 +10,7 @@ import tarfile import time from Queue import Queue from threading import Thread -from multiprocessing import Process, Manager +from multiprocessing import Process, Manager, Value from paddle.v2.dataset.common import md5file @@ -101,40 +101,35 @@ def xmap_readers_mp(mapper, reader, process_num, buffer_size, order=False): :type process_num: int :param buffer_size: Maximal buffer size. :type buffer_size: int - :param order: Reserve the order of samples from the given reader. - :type order: bool - :return: The wrappered reader - :rtype: callable + :return: The wrappered reader and cleanup callback + :rtype: tuple """ end_flag = XmapEndSignal() - # define a worker to read samples from reader to in_queue - def read_worker(reader, in_queue): - for sample in reader(): - in_queue.put(sample) - in_queue.put(end_flag) + read_workers = [] + handle_workers = [] + flush_workers = [] + + read_exit_flag = Value('i', 0) + handle_exit_flag = Value('i', 0) + flush_exit_flag = Value('i', 0) # define a worker to read samples from reader to in_queue with order flag def order_read_worker(reader, in_queue): for order_id, sample in enumerate(reader()): + if read_exit_flag.value == 1: break in_queue.put((order_id, sample)) in_queue.put(end_flag) - - # define a worker to handle samples from in_queue by mapper and put results - # to out_queue - def handle_worker(in_queue, out_queue, mapper): - sample = in_queue.get() - while not isinstance(sample, XmapEndSignal): - out_queue.put(mapper(sample)) - sample = in_queue.get() - in_queue.put(end_flag) - out_queue.put(end_flag) + # the reading worker should not exit until all handling work exited + while handle_exit_flag.value == 0 or read_exit_flag.value == 0: + time.sleep(0.001) # define a worker to handle samples from in_queue by mapper and put results # to out_queue with order def order_handle_worker(in_queue, out_queue, mapper, out_order): ins = in_queue.get() while not isinstance(ins, XmapEndSignal): + if handle_exit_flag.value == 1: break order_id, sample = ins result = mapper(sample) while order_id != out_order[0]: @@ -144,22 +139,39 @@ def xmap_readers_mp(mapper, reader, process_num, buffer_size, order=False): ins = in_queue.get() in_queue.put(end_flag) out_queue.put(end_flag) + # wait for exit of flushing worker + while flush_exit_flag.value == 0 or handle_exit_flag.value == 0: + time.sleep(0.001) + read_exit_flag.value = 1 + handle_exit_flag.value = 1 # define a thread worker to flush samples from Manager.Queue to Queue # for acceleration def flush_worker(in_queue, out_queue): finish = 0 - while finish < process_num: + while finish < process_num and flush_exit_flag.value == 0: sample = in_queue.get() if isinstance(sample, XmapEndSignal): finish += 1 else: out_queue.put(sample) out_queue.put(end_flag) + handle_exit_flag.value = 1 + flush_exit_flag.value = 1 def cleanup(): - # kill all sub process and threads - os._exit(0) + # first exit flushing workers + flush_exit_flag.value = 1 + for w in flush_workers: + w.join() + # next exit handling workers + handle_exit_flag.value = 1 + for w in handle_workers: + w.join() + # last exit reading workers + read_exit_flag.value = 1 + for w in read_workers: + w.join() def xreader(): # prepare shared memory @@ -169,27 +181,29 @@ def xmap_readers_mp(mapper, reader, process_num, buffer_size, order=False): out_order = manager.list([0]) # start a read worker in a process - target = order_read_worker if order else read_worker + target = order_read_worker p = Process(target=target, args=(reader, in_queue)) p.daemon = True p.start() + read_workers.append(p) # start handle_workers with multiple processes - target = order_handle_worker if order else handle_worker - args = (in_queue, out_queue, mapper, out_order) if order else ( - in_queue, out_queue, mapper) + target = order_handle_worker + args = (in_queue, out_queue, mapper, out_order) workers = [ Process(target=target, args=args) for _ in xrange(process_num) ] for w in workers: w.daemon = True w.start() + handle_workers.append(w) # start a thread to read data from slow Manager.Queue flush_queue = Queue(buffer_size) t = Thread(target=flush_worker, args=(out_queue, flush_queue)) t.daemon = True t.start() + flush_workers.append(t) # get results sample = flush_queue.get() From 23e44830699b0599f850e0905fe6d29349f92f2a Mon Sep 17 00:00:00 2001 From: lispc Date: Fri, 8 Dec 2017 20:20:39 +0800 Subject: [PATCH 304/335] fix a comment in audio_featurizer.py --- data_utils/featurizer/audio_featurizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data_utils/featurizer/audio_featurizer.py b/data_utils/featurizer/audio_featurizer.py index f594de7d9..0a54701bf 100644 --- a/data_utils/featurizer/audio_featurizer.py +++ b/data_utils/featurizer/audio_featurizer.py @@ -112,7 +112,7 @@ class AudioFeaturizer(object): if max_freq is None: max_freq = sample_rate / 2 if max_freq > sample_rate / 2: - raise ValueError("max_freq must be greater than half of " + raise ValueError("max_freq must not be greater than half of " "sample rate.") if stride_ms > window_ms: raise ValueError("Stride size must not be greater than " From b8d1e70549877e9fd17e0a24f3b1be7e1f8e08c1 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Fri, 8 Dec 2017 20:48:47 +0800 Subject: [PATCH 305/335] Update default hyper-params of scorer in python scripts --- deploy/demo_server.py | 4 ++-- infer.py | 4 ++-- test.py | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/deploy/demo_server.py b/deploy/demo_server.py index bb339b761..4344b40d3 100644 --- a/deploy/demo_server.py +++ b/deploy/demo_server.py @@ -23,8 +23,8 @@ add_arg('beam_size', int, 500, "Beam search width.") add_arg('num_conv_layers', int, 2, "# of convolution layers.") add_arg('num_rnn_layers', int, 3, "# of recurrent layers.") add_arg('rnn_layer_size', int, 2048, "# of recurrent cells per layer.") -add_arg('alpha', float, 2.15, "Coef of LM for beam search.") -add_arg('beta', float, 0.35, "Coef of WC for beam search.") +add_arg('alpha', float, 2.5, "Coef of LM for beam search.") +add_arg('beta', float, 0.3, "Coef of WC for beam search.") add_arg('cutoff_prob', float, 1.0, "Cutoff probability for pruning.") add_arg('cutoff_top_n', int, 40, "Cutoff number for pruning.") add_arg('use_gru', bool, False, "Use GRUs instead of simple RNNs.") diff --git a/infer.py b/infer.py index 32d15f126..fd725db97 100644 --- a/infer.py +++ b/infer.py @@ -21,8 +21,8 @@ add_arg('num_proc_bsearch', int, 8, "# of CPUs for beam search.") add_arg('num_conv_layers', int, 2, "# of convolution layers.") add_arg('num_rnn_layers', int, 3, "# of recurrent layers.") add_arg('rnn_layer_size', int, 2048, "# of recurrent cells per layer.") -add_arg('alpha', float, 2.15, "Coef of LM for beam search.") -add_arg('beta', float, 0.35, "Coef of WC for beam search.") +add_arg('alpha', float, 2.5, "Coef of LM for beam search.") +add_arg('beta', float, 0.3, "Coef of WC for beam search.") add_arg('cutoff_prob', float, 1.0, "Cutoff probability for pruning.") add_arg('cutoff_top_n', int, 40, "Cutoff number for pruning.") add_arg('use_gru', bool, False, "Use GRUs instead of simple RNNs.") diff --git a/test.py b/test.py index 224cea9b6..df7be1a6d 100644 --- a/test.py +++ b/test.py @@ -22,8 +22,8 @@ add_arg('num_proc_data', int, 8, "# of CPUs for data preprocessing.") add_arg('num_conv_layers', int, 2, "# of convolution layers.") add_arg('num_rnn_layers', int, 3, "# of recurrent layers.") add_arg('rnn_layer_size', int, 2048, "# of recurrent cells per layer.") -add_arg('alpha', float, 2.15, "Coef of LM for beam search.") -add_arg('beta', float, 0.35, "Coef of WC for beam search.") +add_arg('alpha', float, 2.5, "Coef of LM for beam search.") +add_arg('beta', float, 0.3, "Coef of WC for beam search.") add_arg('cutoff_prob', float, 1.0, "Cutoff probability for pruning.") add_arg('cutoff_top_n', int, 40, "Cutoff number for pruning.") add_arg('use_gru', bool, False, "Use GRUs instead of simple RNNs.") From d3dfc3dd238cdac5b60f81680d1b1174059a174d Mon Sep 17 00:00:00 2001 From: yangyaming Date: Sun, 10 Dec 2017 15:29:39 +0800 Subject: [PATCH 306/335] Decouple data provider from model configuration. --- data_utils/data.py | 40 ++------------- deploy/demo_server.py | 16 +----- model_utils/model.py | 114 +++++++++++++++++++++++++++++++++++++++--- test.py | 3 +- train.py | 6 +-- 5 files changed, 114 insertions(+), 65 deletions(-) diff --git a/data_utils/data.py b/data_utils/data.py index af6734f7e..2a6e99b75 100644 --- a/data_utils/data.py +++ b/data_utils/data.py @@ -60,9 +60,6 @@ class DataGenerator(object): be passed forward directly without converting to index sequence. :type keep_transcription_text: bool - :param num_conv_layers: The number of convolution layer, used to compute - the sequence length. - :type num_conv_layers: int """ def __init__(self, @@ -78,8 +75,7 @@ class DataGenerator(object): use_dB_normalization=True, num_threads=multiprocessing.cpu_count() // 2, random_seed=0, - keep_transcription_text=False, - num_conv_layers=2): + keep_transcription_text=False): self._max_duration = max_duration self._min_duration = min_duration self._normalizer = FeatureNormalizer(mean_std_filepath) @@ -100,7 +96,6 @@ class DataGenerator(object): self._local_data = local() self._local_data.tar2info = {} self._local_data.tar2object = {} - self._num_conv_layers = num_conv_layers def process_utterance(self, filename, transcript): """Load, augment, featurize and normalize for speech data. @@ -219,14 +214,7 @@ class DataGenerator(object): :return: Data feeding dict. :rtype: dict """ - feeding_dict = { - "audio_spectrogram": 0, - "transcript_text": 1, - "sequence_offset": 2, - "sequence_length": 3 - } - for i in xrange(self._num_conv_layers): - feeding_dict["conv%d_index_range" % i] = len(feeding_dict) + feeding_dict = {"audio_spectrogram": 0, "transcript_text": 1} return feeding_dict @property @@ -322,29 +310,7 @@ class DataGenerator(object): padded_audio[:, :audio.shape[1]] = audio if flatten: padded_audio = padded_audio.flatten() - - # Stride size for conv0 is (3, 2) - # Stride size for conv1 to convN is (1, 2) - # Same as the network, hard-coded here - padded_instance = [padded_audio, text] - padded_conv0_h = (padded_audio.shape[0] - 1) // 2 + 1 - padded_conv0_w = (padded_audio.shape[1] - 1) // 3 + 1 - valid_w = (audio.shape[1] - 1) // 3 + 1 - padded_instance += [ - [0], # sequence offset, always 0 - [valid_w], # valid sequence length - # Index ranges for channel, height and width - # Please refer scale_sub_region layer to see details - [1, 32, 1, padded_conv0_h, valid_w + 1, padded_conv0_w] - ] - pre_padded_h = padded_conv0_h - for i in xrange(self._num_conv_layers - 1): - padded_h = (pre_padded_h - 1) // 2 + 1 - pre_padded_h = padded_h - padded_instance += [ - [1, 32, 1, padded_h, valid_w + 1, padded_conv0_w] - ] - + padded_instance = [padded_audio, text, audio.shape[1]] new_batch.append(padded_instance) return new_batch diff --git a/deploy/demo_server.py b/deploy/demo_server.py index bb339b761..88703e5f6 100644 --- a/deploy/demo_server.py +++ b/deploy/demo_server.py @@ -147,8 +147,7 @@ def start_server(): augmentation_config='{}', specgram_type=args.specgram_type, num_threads=1, - keep_transcription_text=True, - num_conv_layers=args.num_conv_layers) + keep_transcription_text=True) # prepare ASR model ds2_model = DeepSpeech2Model( vocab_size=data_generator.vocab_size, @@ -164,20 +163,9 @@ def start_server(): # prepare ASR inference handler def file_to_transcript(filename): feature = data_generator.process_utterance(filename, "") - ins = [] - conv0_h = (feature[0].shape[0] - 1) // 2 + 1 - conv0_w = (feature[0].shape[1] - 1) // 3 + 1 - ins += [feature[0], feature[1], - [0], [conv0_w], - [1, 32, 1, conv0_h, conv0_w + 1, conv0_w]] - pre_h = conv0_h - for i in xrange(args.num_conv_layers - 1): - h = (pre_h - 1) // 2 + 1 - pre_h = h - ins += [[1, 32, 1, h, conv0_w + 1, conv0_w]] result_transcript = ds2_model.infer_batch( - infer_data=[ins], + infer_data=[feature], decoding_method=args.decoding_method, beam_alpha=args.alpha, beam_beta=args.beta, diff --git a/model_utils/model.py b/model_utils/model.py index 26aa1470a..beb01fa94 100644 --- a/model_utils/model.py +++ b/model_utils/model.py @@ -8,6 +8,8 @@ import os import time import logging import gzip +import copy +import inspect from distutils.dir_util import mkpath import paddle.v2 as paddle from decoders.swig_wrapper import Scorer @@ -48,6 +50,7 @@ class DeepSpeech2Model(object): self._inferer = None self._loss_inferer = None self._ext_scorer = None + self._num_conv_layers = num_rnn_layers self.logger = logging.getLogger("") self.logger.setLevel(level=logging.INFO) @@ -91,6 +94,11 @@ class DeepSpeech2Model(object): if not os.path.exists(output_model_dir): mkpath(output_model_dir) + # adapt the feeding dict and reader according to the network + adapted_feeding_dict = self._adapt_feeding_dict(feeding_dict) + adapted_train_batch_reader = self._adapt_data(train_batch_reader) + adapted_dev_batch_reader = self._adapt_data(dev_batch_reader) + # prepare optimizer and trainer optimizer = paddle.optimizer.Adam( learning_rate=learning_rate, @@ -128,7 +136,8 @@ class DeepSpeech2Model(object): (time.time() - start_time, event.pass_id)) else: result = trainer.test( - reader=dev_batch_reader, feeding=feeding_dict) + reader=adapted_dev_batch_reader, + feeding=adapted_feeding_dict) print( "\n------- Time: %d sec, Pass: %d, " "ValidationCost: %s" % @@ -140,11 +149,12 @@ class DeepSpeech2Model(object): # run train trainer.train( - reader=train_batch_reader, + reader=adapted_train_batch_reader, event_handler=event_handler, num_passes=num_passes, - feeding=feeding_dict) + feeding=adapted_feeding_dict) + # TODO(@pkuyym) merge this function into infer_batch def infer_loss_batch(self, infer_data): """Model inference. Infer the ctc loss for a batch of speech utterances. @@ -205,15 +215,17 @@ class DeepSpeech2Model(object): if self._inferer == None: self._inferer = paddle.inference.Inference( output_layer=self._log_probs, parameters=self._parameters) + adapted_feeding_dict = self._adapt_feeding_dict(feeding_dict) + adapted_infer_data = self._adapt_data(infer_data) # run inference infer_results = self._inferer.infer( - input=infer_data, feeding=feeding_dict) - start_pos = [0] * (len(infer_data) + 1) - for i in xrange(len(infer_data)): - start_pos[i + 1] = start_pos[i] + infer_data[i][3][0] + input=adapted_infer_data, feeding=adapted_feeding_dict) + start_pos = [0] * (len(adapted_infer_data) + 1) + for i in xrange(len(adapted_infer_data)): + start_pos[i + 1] = start_pos[i] + adapted_infer_data[i][3][0] probs_split = [ infer_results[start_pos[i]:start_pos[i + 1]] - for i in xrange(0, len(infer_data)) + for i in xrange(0, len(adapted_infer_data)) ] # run decoder results = [] @@ -260,6 +272,92 @@ class DeepSpeech2Model(object): decoding_method) return results + def _adapt_feeding_dict(self, feeding_dict): + """Adapt feeding dict according to network struct. + + To remove impacts from padding part, we add scale_sub_region layer and + sub_seq layer. For sub_seq layer, 'sequence_offset' and + 'sequence_length' fields are appended. For each scale_sub_region layer + 'convN_index_range' field is appended. + + :param feeding_dict: Feeding is a map of field name and tuple index + of the data that reader returns. + :type feeding_dict: dict|list + :return: Adapted feeding dict. + :rtype: dict|list + """ + adapted_feeding_dict = copy.deepcopy(feeding_dict) + if isinstance(feeding_dict, dict): + adapted_feeding_dict["sequence_offset"] = len(adapted_feeding_dict) + adapted_feeding_dict["sequence_length"] = len(adapted_feeding_dict) + for i in xrange(self._num_conv_layers): + adapted_feeding_dict["conv%d_index_range" %i] = \ + len(adapted_feeding_dict) + elif isinstance(feeding_dict, list): + adapted_feeding_dict.append("sequence_offset") + adapted_feeding_dict.append("sequence_length") + for i in xrange(self._num_conv_layers): + adapted_feeding_dict.append("conv%d_index_range" % i) + else: + raise ValueError("Type of feeding_dict is %s, not supported." % + type(feeding_dict)) + + return adapted_feeding_dict + + def _adapt_data(self, data): + """Adapt data according to network struct. + + For each convolution layer in the conv_group, to remove impacts from + padding data, we can multiply zero to the padding part of the outputs + of each batch normalization layer. We add a scale_sub_region layer after + each batch normalization layer to reset the padding data. + For rnn layers, to remove impacts from padding data, we can truncate the + padding part before output data feeded into the first rnn layer. We use + sub_seq layer to achieve this. + + :param data: Data from data_provider. + :type data: list|function + :return: Adapted data. + :rtype: list|function + """ + + def adapt_instance(instance): + padded_audio, text, audio_len = instance + adapted_instance = [padded_audio, text] + # Stride size for conv0 is (3, 2) + # Stride size for conv1 to convN is (1, 2) + # Same as the network, hard-coded here + padded_conv0_h = (padded_audio.shape[0] - 1) // 2 + 1 + padded_conv0_w = (padded_audio.shape[1] - 1) // 3 + 1 + valid_w = (audio_len - 1) // 3 + 1 + adapted_instance += [ + [0], # sequence offset, always 0 + [valid_w], # valid sequence length + # Index ranges for channel, height and width + # Please refer scale_sub_region layer to see details + [1, 32, 1, padded_conv0_h, valid_w + 1, padded_conv0_w] + ] + pre_padded_h = padded_conv0_h + for i in xrange(self._num_conv_layers - 1): + padded_h = (pre_padded_h - 1) // 2 + 1 + pre_padded_h = padded_h + adapted_instance += [ + [1, 32, 1, padded_h, valid_w + 1, padded_conv0_w] + ] + return adapted_instance + + if isinstance(data, list): + return map(adapt_instance, data) + elif inspect.isgeneratorfunction(data): + + def adapted_reader(): + for instance in data(): + yield map(adapt_instance, instance) + + return adapted_reader + else: + raise ValueError("Type of data is %s, not supported." % type(data)) + def _create_parameters(self, model_path=None): """Load or create model parameters.""" if model_path is None: diff --git a/test.py b/test.py index 224cea9b6..85b49f2ae 100644 --- a/test.py +++ b/test.py @@ -70,8 +70,7 @@ def evaluate(): augmentation_config='{}', specgram_type=args.specgram_type, num_threads=args.num_proc_data, - keep_transcription_text=True, - num_conv_layers=args.num_conv_layers) + keep_transcription_text=True) batch_reader = data_generator.batch_reader_creator( manifest_path=args.test_manifest, batch_size=args.batch_size, diff --git a/train.py b/train.py index 562fb4622..16415713f 100644 --- a/train.py +++ b/train.py @@ -75,15 +75,13 @@ def train(): max_duration=args.max_duration, min_duration=args.min_duration, specgram_type=args.specgram_type, - num_threads=args.num_proc_data, - num_conv_layers=args.num_conv_layers) + num_threads=args.num_proc_data) dev_generator = DataGenerator( vocab_filepath=args.vocab_path, mean_std_filepath=args.mean_std_path, augmentation_config="{}", specgram_type=args.specgram_type, - num_threads=args.num_proc_data, - num_conv_layers=args.num_conv_layers) + num_threads=args.num_proc_data) train_batch_reader = train_generator.batch_reader_creator( manifest_path=args.train_manifest, batch_size=args.batch_size, From c94fbdb41982e8727613963dfb8ec25f2fc1b0d0 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Sun, 10 Dec 2017 15:44:17 +0800 Subject: [PATCH 307/335] Consider instance without padding data. --- model_utils/model.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/model_utils/model.py b/model_utils/model.py index beb01fa94..5f51c6454 100644 --- a/model_utils/model.py +++ b/model_utils/model.py @@ -322,7 +322,15 @@ class DeepSpeech2Model(object): """ def adapt_instance(instance): - padded_audio, text, audio_len = instance + if len(instance) < 2 or len(instance) > 3: + raise ValueError("Size of instance should be 2 or 3.") + padded_audio = instance[0] + text = instance[1] + # no padding part + if len(instance) == 2: + audio_len = padded_audio.shape[1] + else: + audio_len = instance[2] adapted_instance = [padded_audio, text] # Stride size for conv0 is (3, 2) # Stride size for conv1 to convN is (1, 2) From d7291da01f763adc26cf03a8d2ba9e96b8d4e662 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Sun, 10 Dec 2017 15:55:46 +0800 Subject: [PATCH 308/335] Bug fix. --- model_utils/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/model_utils/model.py b/model_utils/model.py index 5f51c6454..85d50053e 100644 --- a/model_utils/model.py +++ b/model_utils/model.py @@ -50,7 +50,7 @@ class DeepSpeech2Model(object): self._inferer = None self._loss_inferer = None self._ext_scorer = None - self._num_conv_layers = num_rnn_layers + self._num_conv_layers = num_conv_layers self.logger = logging.getLogger("") self.logger.setLevel(level=logging.INFO) From 1f1dee2e88158d43dbeb7d09c639d476ba43cde1 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Mon, 11 Dec 2017 17:27:47 +0800 Subject: [PATCH 309/335] Adapt infer.py. --- infer.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/infer.py b/infer.py index 32d15f126..7e30549ae 100644 --- a/infer.py +++ b/infer.py @@ -69,8 +69,7 @@ def infer(): augmentation_config='{}', specgram_type=args.specgram_type, num_threads=1, - keep_transcription_text=True, - num_conv_layers=args.num_conv_layers) + keep_transcription_text=True) batch_reader = data_generator.batch_reader_creator( manifest_path=args.infer_manifest, batch_size=args.num_samples, From 6c2d0e61b5dc4f3b4eb7e32660c29a39e74de961 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Wed, 13 Dec 2017 10:23:04 +0800 Subject: [PATCH 310/335] fix the link to cloud training in doc --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 41307b045..4462ecb8b 100644 --- a/README.md +++ b/README.md @@ -395,13 +395,13 @@ Please take the following steps to submit a training job: It should be done only once for the very first time to do the cloud training. Later, the data is kept persisitent on the cloud filesystem and reusable for further job submissions. - For argument details please refer to [Train DeepSpeech2 on PaddleCloud](https://github.com/PaddlePaddle/models/tree/develop/deep_speech_2/cloud). + For argument details please refer to [Train DeepSpeech2 on PaddleCloud](https://github.com/PaddlePaddle/DeepSpeech/tree/develop/cloud). - Configure training arguments: Configure the cloud job parameters in `pcloud_submit.sh` (e.g. `NUM_NODES`, `NUM_GPUS`, `CLOUD_TRAIN_DIR`, `JOB_NAME` etc.) and then configure other hyper-parameters for training in `pcloud_train.sh` (just as what you do for local training). - For argument details please refer to [Train DeepSpeech2 on PaddleCloud](https://github.com/PaddlePaddle/models/tree/develop/deep_speech_2/cloud). + For argument details please refer to [Train DeepSpeech2 on PaddleCloud](https://github.com/PaddlePaddle/DeepSpeech/tree/develop/cloud). - Submit the job: @@ -428,7 +428,7 @@ Please take the following steps to submit a training job: For more information about the usage of PaddleCloud, please refer to [PaddleCloud Usage](https://github.com/PaddlePaddle/cloud/blob/develop/doc/usage_cn.md#提交任务). For more information about the DeepSpeech2 training on PaddleCloud, please refer to -[Train DeepSpeech2 on PaddleCloud](https://github.com/PaddlePaddle/models/tree/develop/deep_speech_2/cloud). +[Train DeepSpeech2 on PaddleCloud](https://github.com/PaddlePaddle/DeepSpeech/tree/develop/cloud). ## Training for Mandarin Language From 14dc5d43cbf49bec9bb25257e4846bf0bef1b121 Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Wed, 20 Dec 2017 21:00:14 +0800 Subject: [PATCH 311/335] Update Baidu8k model. --- models/baidu_en8k/download_model.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/baidu_en8k/download_model.sh b/models/baidu_en8k/download_model.sh index e6b26a3e8..796b6cb9d 100644 --- a/models/baidu_en8k/download_model.sh +++ b/models/baidu_en8k/download_model.sh @@ -3,7 +3,7 @@ . ../../utils/utility.sh URL='http://cloud.dlnel.org/filepub/?uuid=37a1c211-ec47-494c-973c-31437a10ae90' -MD5=a19d40cb3b558eb696c44d883f32cfda +MD5=5fe7639e720d51b3c3bdf7a1470c6272 TARGET=./baidu_en8k_model.tar.gz From 0823cd2ce468b9987a2b2dd73e022b323910bd54 Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Wed, 20 Dec 2017 21:35:24 +0800 Subject: [PATCH 312/335] Upload BaiduCN1.2k model. --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 4462ecb8b..7faa2da71 100644 --- a/README.md +++ b/README.md @@ -489,7 +489,7 @@ Language | Model Name | Training Data | Hours of Speech English | [LibriSpeech Model](http://cloud.dlnel.org/filepub/?uuid=117cde63-cd59-4948-8b80-df782555f7d6) | [LibriSpeech Dataset](http://www.openslr.org/12/) | 960 h English | [BaiduEN8k Model](http://cloud.dlnel.org/filepub/?uuid=37a1c211-ec47-494c-973c-31437a10ae90) | Baidu Internal English Dataset | 8628 h Mandarin | [Aishell Model](http://cloud.dlnel.org/filepub/?uuid=61de63b9-6904-4809-ad95-0cc5104ab973) | [Aishell Dataset](http://www.openslr.org/33/) | 151 h -Mandarin | [BaiduCN1.2k Model](to-be-added) | Baidu Internal Mandarin Dataset | 1204 h +Mandarin | [BaiduCN1.2k Model](http://cloud.dlnel.org/filepub/?uuid=499569a6-0025-4f40-83e6-1c99527431a6) | Baidu Internal Mandarin Dataset | 1204 h #### Language Model Released @@ -517,9 +517,9 @@ For reproducing benchmark results on VoxForge data, we provide a script to downl #### Benchmark Results for Mandarin Model (Character Error Rate) -Test Set | Aishell Model | BaiduCN1.2k Model -:--------------------- | ---------------: | -------------------: -Baidu Internal Testset | - | 15.49 +Test Set | BaiduCN1.2k Model +:--------------------- | -------------------: +Baidu Internal Testset | 12.64 #### Acceleration with Multi-GPUs From 9c5daab08d4dba1d0a30ef45a33ae3ab30aa7722 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Thu, 21 Dec 2017 10:17:15 +0800 Subject: [PATCH 313/335] Update benchmark result for BaiduEN8K model due to #88 --- README.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 7faa2da71..9480eccb9 100644 --- a/README.md +++ b/README.md @@ -505,13 +505,13 @@ Language Model | Training Data | Token-based | Size | Descriptions Test Set | LibriSpeech Model | BaiduEN8K Model :--------------------- | ---------------: | -------------------: -LibriSpeech Test-Clean | 6.85 | 5.73 -LibriSpeech Test-Other | 21.18 | 14.47 -VoxForge American-Canadian | 12.12 |   7.37 -VoxForge Commonwealth | 19.82 | 15.58 -VoxForge European | 30.15 | 19.44 -VoxForge Indian | 53.73 | 26.15 -Baidu Internal Testset  |   40.75 |   8.82 +LibriSpeech Test-Clean | 6.85 | 5.41 +LibriSpeech Test-Other | 21.18 | 13.85 +VoxForge American-Canadian | 12.12 |   7.13 +VoxForge Commonwealth | 19.82 | 14.93 +VoxForge European | 30.15 | 18.64 +VoxForge Indian | 53.73 | 25.51 +Baidu Internal Testset  |   40.75 |   8.48 For reproducing benchmark results on VoxForge data, we provide a script to download data and generate VoxForge dialect manifest files. Please go to ```data/voxforge``` and execute ```sh run_data.sh``` to get VoxForge dialect manifest files. Notice that VoxForge data may keep updating and the generated manifest files may have difference from those we evaluated on. From 3ecf1ad4f5f2d61d31184c84be7ccfffc385611f Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Fri, 12 Jan 2018 21:33:00 +0800 Subject: [PATCH 314/335] Decouple ext scorer init & inference & decoding for the convenience of tuning --- examples/librispeech/run_tune.sh | 2 +- infer.py | 32 ++++--- model_utils/model.py | 158 +++++++++++++++++-------------- test.py | 31 ++++-- tools/tune.py | 104 +++++--------------- 5 files changed, 153 insertions(+), 174 deletions(-) diff --git a/examples/librispeech/run_tune.sh b/examples/librispeech/run_tune.sh index c3695d1cb..9fc9cbb9d 100644 --- a/examples/librispeech/run_tune.sh +++ b/examples/librispeech/run_tune.sh @@ -7,7 +7,7 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 \ python -u tools/tune.py \ --num_batches=-1 \ --batch_size=128 \ ---trainer_count=8 \ +--trainer_count=4 \ --beam_size=500 \ --num_proc_bsearch=12 \ --num_conv_layers=2 \ diff --git a/infer.py b/infer.py index b801c507b..1539fbaaf 100644 --- a/infer.py +++ b/infer.py @@ -90,18 +90,26 @@ def infer(): # decoders only accept string encoded in utf-8 vocab_list = [chars.encode("utf-8") for chars in data_generator.vocab_list] - result_transcripts = ds2_model.infer_batch( - infer_data=infer_data, - decoding_method=args.decoding_method, - beam_alpha=args.alpha, - beam_beta=args.beta, - beam_size=args.beam_size, - cutoff_prob=args.cutoff_prob, - cutoff_top_n=args.cutoff_top_n, - vocab_list=vocab_list, - language_model_path=args.lang_model_path, - num_processes=args.num_proc_bsearch, - feeding_dict=data_generator.feeding) + probs_split = ds2_model.infer_probs_batch(infer_data=infer_data, + feeding_dict=data_generator.feeding) + if args.decoding_method == "ctc_greedy": + ds2_model.logger.info("start inference ...") + result_transcripts = ds2_model.infer_batch_greedy( + probs_split=probs_split, + vocab_list=vocab_list) + else: + ds2_model.init_ext_scorer(args.alpha, args.beta, args.lang_model_path, + vocab_list) + ds2_model.logger.info("start inference ...") + result_transcripts = ds2_model.infer_batch_beam_search( + probs_split=probs_split, + beam_alpha=args.alpha, + beam_beta=args.beta, + beam_size=args.beam_size, + cutoff_prob=args.cutoff_prob, + cutoff_top_n=args.cutoff_top_n, + vocab_list=vocab_list, + num_processes=args.num_proc_bsearch) error_rate_func = cer if args.error_rate_type == 'cer' else wer target_transcripts = [data[1] for data in infer_data] diff --git a/model_utils/model.py b/model_utils/model.py index 85d50053e..f6d3ef059 100644 --- a/model_utils/model.py +++ b/model_utils/model.py @@ -173,43 +173,19 @@ class DeepSpeech2Model(object): # run inference return self._loss_inferer.infer(input=infer_data) - def infer_batch(self, infer_data, decoding_method, beam_alpha, beam_beta, - beam_size, cutoff_prob, cutoff_top_n, vocab_list, - language_model_path, num_processes, feeding_dict): - """Model inference. Infer the transcription for a batch of speech - utterances. + def infer_probs_batch(self, infer_data, feeding_dict): + """Infer the prob matrices for a batch of speech utterances. :param infer_data: List of utterances to infer, with each utterance consisting of a tuple of audio features and transcription text (empty string). :type infer_data: list - :param decoding_method: Decoding method name, 'ctc_greedy' or - 'ctc_beam_search'. - :param decoding_method: string - :param beam_alpha: Parameter associated with language model. - :type beam_alpha: float - :param beam_beta: Parameter associated with word count. - :type beam_beta: float - :param beam_size: Width for Beam search. - :type beam_size: int - :param cutoff_prob: Cutoff probability in pruning, - default 1.0, no pruning. - :type cutoff_prob: float - :param cutoff_top_n: Cutoff number in pruning, only top cutoff_top_n - characters with highest probs in vocabulary will be - used in beam search, default 40. - :type cutoff_top_n: int - :param vocab_list: List of tokens in the vocabulary, for decoding. - :type vocab_list: list - :param language_model_path: Filepath for language model. - :type language_model_path: basestring|None - :param num_processes: Number of processes (CPU) for decoder. - :type num_processes: int :param feeding_dict: Feeding is a map of field name and tuple index of the data that reader returns. :type feeding_dict: dict|list - :return: List of transcription texts. - :rtype: List of basestring + :return: List of 2-D probability matrix, and each consists of prob + vectors for one speech utterancce. + :rtype: List of matrix """ # define inferer if self._inferer == None: @@ -227,49 +203,91 @@ class DeepSpeech2Model(object): infer_results[start_pos[i]:start_pos[i + 1]] for i in xrange(0, len(adapted_infer_data)) ] - # run decoder + return probs_split + + def infer_batch_greedy(self, probs_split, vocab_list): + """ + :param probs_split: List of 2-D probability matrix, and each consists + of prob vectors for one speech utterancce. + :param probs_split: List of matrix + :param vocab_list: List of tokens in the vocabulary, for decoding. + :type vocab_list: list + :return: List of transcription texts. + :rtype: List of basestring + """ results = [] - if decoding_method == "ctc_greedy": - # best path decode - for i, probs in enumerate(probs_split): - output_transcription = ctc_greedy_decoder( - probs_seq=probs, vocabulary=vocab_list) - results.append(output_transcription) - elif decoding_method == "ctc_beam_search": - # initialize external scorer - if self._ext_scorer == None: - self._loaded_lm_path = language_model_path - self.logger.info("begin to initialize the external scorer " - "for decoding") - self._ext_scorer = Scorer(beam_alpha, beam_beta, - language_model_path, vocab_list) - - lm_char_based = self._ext_scorer.is_character_based() - lm_max_order = self._ext_scorer.get_max_order() - lm_dict_size = self._ext_scorer.get_dict_size() - self.logger.info("language model: " - "is_character_based = %d," % lm_char_based + - " max_order = %d," % lm_max_order + - " dict_size = %d" % lm_dict_size) - self.logger.info("end initializing scorer. Start decoding ...") - else: - self._ext_scorer.reset_params(beam_alpha, beam_beta) - assert self._loaded_lm_path == language_model_path - # beam search decode - num_processes = min(num_processes, len(probs_split)) - beam_search_results = ctc_beam_search_decoder_batch( - probs_split=probs_split, - vocabulary=vocab_list, - beam_size=beam_size, - num_processes=num_processes, - ext_scoring_func=self._ext_scorer, - cutoff_prob=cutoff_prob, - cutoff_top_n=cutoff_top_n) - - results = [result[0][1] for result in beam_search_results] + for i, probs in enumerate(probs_split): + output_transcription = ctc_greedy_decoder( + probs_seq=probs, vocabulary=vocab_list) + results.append(output_transcription) + return results + + def init_ext_scorer(self, beam_alpha, beam_beta, language_model_path, + vocab_list): + """Initialize the external scorer. + + """ + if language_model_path != '': + self.logger.info("begin to initialize the external scorer " + "for decoding") + self._ext_scorer = Scorer(beam_alpha, beam_beta, + language_model_path, vocab_list) + lm_char_based = self._ext_scorer.is_character_based() + lm_max_order = self._ext_scorer.get_max_order() + lm_dict_size = self._ext_scorer.get_dict_size() + self.logger.info("language model: " + "is_character_based = %d," % lm_char_based + + " max_order = %d," % lm_max_order + + " dict_size = %d" % lm_dict_size) + self.logger.info("end initializing scorer") else: - raise ValueError("Decoding method [%s] is not supported." % - decoding_method) + self._ext_scorer = None + self.logger.info("no language model provided, " + "decoding by pure beam search without scorer.") + + def infer_batch_beam_search(self, probs_split, beam_alpha, beam_beta, + beam_size, cutoff_prob, cutoff_top_n, + vocab_list, num_processes): + """Model inference. Infer the transcription for a batch of speech + utterances. + + :param probs_split: List of 2-D probability matrix, and each consists + of prob vectors for one speech utterancce. + :param probs_split: List of matrix + :param beam_alpha: Parameter associated with language model. + :type beam_alpha: float + :param beam_beta: Parameter associated with word count. + :type beam_beta: float + :param beam_size: Width for Beam search. + :type beam_size: int + :param cutoff_prob: Cutoff probability in pruning, + default 1.0, no pruning. + :type cutoff_prob: float + :param cutoff_top_n: Cutoff number in pruning, only top cutoff_top_n + characters with highest probs in vocabulary will be + used in beam search, default 40. + :type cutoff_top_n: int + :param vocab_list: List of tokens in the vocabulary, for decoding. + :type vocab_list: list + :param num_processes: Number of processes (CPU) for decoder. + :type num_processes: int + :return: List of transcription texts. + :rtype: List of basestring + """ + if self._ext_scorer != None: + self._ext_scorer.reset_params(beam_alpha, beam_beta) + # beam search decode + num_processes = min(num_processes, len(probs_split)) + beam_search_results = ctc_beam_search_decoder_batch( + probs_split=probs_split, + vocabulary=vocab_list, + beam_size=beam_size, + num_processes=num_processes, + ext_scoring_func=self._ext_scorer, + cutoff_prob=cutoff_prob, + cutoff_top_n=cutoff_top_n) + + results = [result[0][1] for result in beam_search_results] return results def _adapt_feeding_dict(self, feeding_dict): diff --git a/test.py b/test.py index 5cf766487..24ce54a2b 100644 --- a/test.py +++ b/test.py @@ -90,22 +90,33 @@ def evaluate(): # decoders only accept string encoded in utf-8 vocab_list = [chars.encode("utf-8") for chars in data_generator.vocab_list] + if args.decoding_method == "ctc_beam_search": + ds2_model.init_ext_scorer(args.alpha, args.beta, args.lang_model_path, + vocab_list) errors_func = char_errors if args.error_rate_type == 'cer' else word_errors errors_sum, len_refs, num_ins = 0.0, 0, 0 + ds2_model.logger.info("start evaluation ...") for infer_data in batch_reader(): - result_transcripts = ds2_model.infer_batch( + probs_split = ds2_model.infer_probs_batch( infer_data=infer_data, - decoding_method=args.decoding_method, - beam_alpha=args.alpha, - beam_beta=args.beta, - beam_size=args.beam_size, - cutoff_prob=args.cutoff_prob, - cutoff_top_n=args.cutoff_top_n, - vocab_list=vocab_list, - language_model_path=args.lang_model_path, - num_processes=args.num_proc_bsearch, feeding_dict=data_generator.feeding) + + if args.decoding_method == "ctc_greedy": + result_transcripts = ds2_model.infer_batch_greedy( + probs_split=probs_split, + vocab_list=vocab_list) + else: + result_transcripts = ds2_model.infer_batch_beam_search( + probs_split=probs_split, + beam_alpha=args.alpha, + beam_beta=args.beta, + beam_size=args.beam_size, + cutoff_prob=args.cutoff_prob, + cutoff_top_n=args.cutoff_top_n, + vocab_list=vocab_list, + num_processes=args.num_proc_bsearch) target_transcripts = [data[1] for data in infer_data] + for target, result in zip(target_transcripts, result_transcripts): errors, len_ref = errors_func(target, result) errors_sum += errors diff --git a/tools/tune.py b/tools/tune.py index b13233195..83978be8d 100644 --- a/tools/tune.py +++ b/tools/tune.py @@ -13,9 +13,7 @@ import logging import paddle.v2 as paddle import _init_paths from data_utils.data import DataGenerator -from decoders.swig_wrapper import Scorer -from decoders.swig_wrapper import ctc_beam_search_decoder_batch -from model_utils.model import deep_speech_v2_network +from model_utils.model import DeepSpeech2Model from utils.error_rate import char_errors, word_errors from utils.utility import add_arguments, print_arguments @@ -88,40 +86,7 @@ def tune(): augmentation_config='{}', specgram_type=args.specgram_type, num_threads=args.num_proc_data, - keep_transcription_text=True, - num_conv_layers=args.num_conv_layers) - - audio_data = paddle.layer.data( - name="audio_spectrogram", - type=paddle.data_type.dense_array(161 * 161)) - text_data = paddle.layer.data( - name="transcript_text", - type=paddle.data_type.integer_value_sequence(data_generator.vocab_size)) - seq_offset_data = paddle.layer.data( - name='sequence_offset', - type=paddle.data_type.integer_value_sequence(1)) - seq_len_data = paddle.layer.data( - name='sequence_length', - type=paddle.data_type.integer_value_sequence(1)) - index_range_datas = [] - for i in xrange(args.num_rnn_layers): - index_range_datas.append( - paddle.layer.data( - name='conv%d_index_range' % i, - type=paddle.data_type.dense_vector(6))) - - output_probs, _ = deep_speech_v2_network( - audio_data=audio_data, - text_data=text_data, - seq_offset_data=seq_offset_data, - seq_len_data=seq_len_data, - index_range_datas=index_range_datas, - dict_size=data_generator.vocab_size, - num_conv_layers=args.num_conv_layers, - num_rnn_layers=args.num_rnn_layers, - rnn_size=args.rnn_layer_size, - use_gru=args.use_gru, - share_rnn_weights=args.share_rnn_weights) + keep_transcription_text=True) batch_reader = data_generator.batch_reader_creator( manifest_path=args.tune_manifest, @@ -129,35 +94,17 @@ def tune(): sortagrad=False, shuffle_method=None) - # load parameters - if not os.path.isfile(args.model_path): - raise IOError("Invaid model path: %s" % args.model_path) - parameters = paddle.parameters.Parameters.from_tar( - gzip.open(args.model_path)) + ds2_model = DeepSpeech2Model( + vocab_size=data_generator.vocab_size, + num_conv_layers=args.num_conv_layers, + num_rnn_layers=args.num_rnn_layers, + rnn_layer_size=args.rnn_layer_size, + use_gru=args.use_gru, + pretrained_model_path=args.model_path, + share_rnn_weights=args.share_rnn_weights) - inferer = paddle.inference.Inference( - output_layer=output_probs, parameters=parameters) # decoders only accept string encoded in utf-8 vocab_list = [chars.encode("utf-8") for chars in data_generator.vocab_list] - - # init logger - logger = logging.getLogger("") - logger.setLevel(level=logging.INFO) - # init external scorer - logger.info("begin to initialize the external scorer for tuning") - if not os.path.isfile(args.lang_model_path): - raise IOError("Invaid language model path: %s" % args.lang_model_path) - ext_scorer = Scorer( - alpha=args.alpha_from, - beta=args.beta_from, - model_path=args.lang_model_path, - vocabulary=vocab_list) - logger.info("language model: " - "is_character_based = %d," % ext_scorer.is_character_based() + - " max_order = %d," % ext_scorer.get_max_order() + - " dict_size = %d" % ext_scorer.get_dict_size()) - logger.info("end initializing scorer. Start tuning ...") - errors_func = char_errors if args.error_rate_type == 'cer' else word_errors # create grid for search cand_alphas = np.linspace(args.alpha_from, args.alpha_to, args.num_alphas) @@ -168,37 +115,32 @@ def tune(): err_sum = [0.0 for i in xrange(len(params_grid))] err_ave = [0.0 for i in xrange(len(params_grid))] num_ins, len_refs, cur_batch = 0, 0, 0 + # initialize external scorer + ds2_model.init_ext_scorer(args.alpha_from, args.beta_from, + args.lang_model_path, vocab_list) ## incremental tuning parameters over multiple batches + ds2_model.logger.info("start tuning ...") for infer_data in batch_reader(): if (args.num_batches >= 0) and (cur_batch >= args.num_batches): break - infer_results = inferer.infer(input=infer_data, - feeding=data_generator.feeding) - start_pos = [0] * (len(infer_data) + 1) - for i in xrange(len(infer_data)): - start_pos[i + 1] = start_pos[i] + infer_data[i][3][0] - probs_split = [ - infer_results[start_pos[i]:start_pos[i + 1]] - for i in xrange(0, len(infer_data)) - ] - + probs_split = ds2_model.infer_probs_batch( + infer_data=infer_data, + feeding_dict=data_generator.feeding) target_transcripts = [ data[1] for data in infer_data ] num_ins += len(target_transcripts) # grid search for index, (alpha, beta) in enumerate(params_grid): - # reset alpha & beta - ext_scorer.reset_params(alpha, beta) - beam_search_results = ctc_beam_search_decoder_batch( + result_transcripts = ds2_model.infer_batch_beam_search( probs_split=probs_split, - vocabulary=vocab_list, + beam_alpha=alpha, + beam_beta=beta, beam_size=args.beam_size, - num_processes=args.num_proc_bsearch, cutoff_prob=args.cutoff_prob, cutoff_top_n=args.cutoff_top_n, - ext_scoring_func=ext_scorer, ) + vocab_list=vocab_list, + num_processes=args.num_proc_bsearch) - result_transcripts = [res[0][1] for res in beam_search_results] for target, result in zip(target_transcripts, result_transcripts): errors, len_ref = errors_func(target, result) err_sum[index] += errors @@ -235,7 +177,7 @@ def tune(): % (cur_batch, "%.3f" % params_grid[min_index][0], "%.3f" % params_grid[min_index][1])) - logger.info("finish tuning") + ds2_model.logger.info("finish tuning") def main(): From 8ae25aebe17d4bb672092e74ae6c31cdae692775 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Fri, 12 Jan 2018 21:53:43 +0800 Subject: [PATCH 315/335] Add more comments in init_ext_scorer() --- model_utils/model.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/model_utils/model.py b/model_utils/model.py index f6d3ef059..70ba7bb93 100644 --- a/model_utils/model.py +++ b/model_utils/model.py @@ -226,6 +226,17 @@ class DeepSpeech2Model(object): vocab_list): """Initialize the external scorer. + :param beam_alpha: Parameter associated with language model. + :type beam_alpha: float + :param beam_beta: Parameter associated with word count. + :type beam_beta: float + :param language_model_path: Filepath for language model. If it is + empty, the external scorer will be set to + None, and the decoding method will be pure + beam search without scorer. + :type language_model_path: basestring|None + :param vocab_list: List of tokens in the vocabulary, for decoding. + :type vocab_list: list """ if language_model_path != '': self.logger.info("begin to initialize the external scorer " From 10d337097005ac56f57f429266543d892c36a64d Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Fri, 12 Jan 2018 22:50:19 +0800 Subject: [PATCH 316/335] Remove redundant lines in tune.py --- tools/tune.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tools/tune.py b/tools/tune.py index 83978be8d..923e6c3c3 100644 --- a/tools/tune.py +++ b/tools/tune.py @@ -70,9 +70,6 @@ add_arg('specgram_type', str, args = parser.parse_args() -logging.basicConfig( - format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)d] %(message)s') - def tune(): """Tune parameters alpha and beta incrementally.""" if not args.num_alphas >= 0: From 3a36c8a69ea50200439794f7cb87a97267044887 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Fri, 12 Jan 2018 23:22:08 +0800 Subject: [PATCH 317/335] Adapt demo_server to the decoupling in infer_batch() --- deploy/demo_server.py | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/deploy/demo_server.py b/deploy/demo_server.py index d64f9f015..53be16f77 100644 --- a/deploy/demo_server.py +++ b/deploy/demo_server.py @@ -160,22 +160,30 @@ def start_server(): vocab_list = [chars.encode("utf-8") for chars in data_generator.vocab_list] + if args.decoding_method == "ctc_beam_search": + ds2_model.init_ext_scorer(args.alpha, args.beta, args.lang_model_path, + vocab_list) # prepare ASR inference handler def file_to_transcript(filename): feature = data_generator.process_utterance(filename, "") - - result_transcript = ds2_model.infer_batch( + probs_split = ds2_model.infer_probs_batch( infer_data=[feature], - decoding_method=args.decoding_method, - beam_alpha=args.alpha, - beam_beta=args.beta, - beam_size=args.beam_size, - cutoff_prob=args.cutoff_prob, - cutoff_top_n=args.cutoff_top_n, - vocab_list=vocab_list, - language_model_path=args.lang_model_path, - num_processes=1, feeding_dict=data_generator.feeding) + + if args.decoding_method == "ctc_greedy": + result_transcript = ds2_model.infer_batch_greedy( + probs_split=probs_split, + vocab_list=vocab_list) + else: + result_transcript = ds2_model.infer_batch_beam_search( + probs_split=probs_split, + beam_alpha=args.alpha, + beam_beta=args.beta, + beam_size=args.beam_size, + cutoff_prob=args.cutoff_prob, + cutoff_top_n=args.cutoff_top_n, + vocab_list=vocab_list, + num_processes=1) return result_transcript[0] # warming up with utterrances sampled from Librispeech From 66a39088180052813c80a33919b85ff976b6e076 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Fri, 12 Jan 2018 23:33:03 +0800 Subject: [PATCH 318/335] Adjust the order of scorer init & probs infer in infer.py --- infer.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/infer.py b/infer.py index 1539fbaaf..5dd9b406d 100644 --- a/infer.py +++ b/infer.py @@ -90,17 +90,18 @@ def infer(): # decoders only accept string encoded in utf-8 vocab_list = [chars.encode("utf-8") for chars in data_generator.vocab_list] + if args.decoding_method == "ctc_beam_search": + ds2_model.init_ext_scorer(args.alpha, args.beta, args.lang_model_path, + vocab_list) + + ds2_model.logger.info("start inference ...") probs_split = ds2_model.infer_probs_batch(infer_data=infer_data, feeding_dict=data_generator.feeding) if args.decoding_method == "ctc_greedy": - ds2_model.logger.info("start inference ...") result_transcripts = ds2_model.infer_batch_greedy( probs_split=probs_split, vocab_list=vocab_list) else: - ds2_model.init_ext_scorer(args.alpha, args.beta, args.lang_model_path, - vocab_list) - ds2_model.logger.info("start inference ...") result_transcripts = ds2_model.infer_batch_beam_search( probs_split=probs_split, beam_alpha=args.alpha, From 6c2cf40ce1abbd60a775ee0272bab48836ff9848 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Sat, 13 Jan 2018 11:27:40 +0800 Subject: [PATCH 319/335] Rename prefix 'infer_batch' to 'decode_batch' --- deploy/demo_server.py | 4 ++-- infer.py | 4 ++-- model_utils/model.py | 14 +++++++------- test.py | 4 ++-- tools/tune.py | 2 +- 5 files changed, 14 insertions(+), 14 deletions(-) diff --git a/deploy/demo_server.py b/deploy/demo_server.py index 53be16f77..eca13dcea 100644 --- a/deploy/demo_server.py +++ b/deploy/demo_server.py @@ -171,11 +171,11 @@ def start_server(): feeding_dict=data_generator.feeding) if args.decoding_method == "ctc_greedy": - result_transcript = ds2_model.infer_batch_greedy( + result_transcript = ds2_model.decode_batch_greedy( probs_split=probs_split, vocab_list=vocab_list) else: - result_transcript = ds2_model.infer_batch_beam_search( + result_transcript = ds2_model.decode_batch_beam_search( probs_split=probs_split, beam_alpha=args.alpha, beam_beta=args.beta, diff --git a/infer.py b/infer.py index 5dd9b406d..ff45a5dc8 100644 --- a/infer.py +++ b/infer.py @@ -98,11 +98,11 @@ def infer(): probs_split = ds2_model.infer_probs_batch(infer_data=infer_data, feeding_dict=data_generator.feeding) if args.decoding_method == "ctc_greedy": - result_transcripts = ds2_model.infer_batch_greedy( + result_transcripts = ds2_model.decode_batch_greedy( probs_split=probs_split, vocab_list=vocab_list) else: - result_transcripts = ds2_model.infer_batch_beam_search( + result_transcripts = ds2_model.decode_batch_beam_search( probs_split=probs_split, beam_alpha=args.alpha, beam_beta=args.beta, diff --git a/model_utils/model.py b/model_utils/model.py index 70ba7bb93..a8283fae4 100644 --- a/model_utils/model.py +++ b/model_utils/model.py @@ -205,8 +205,9 @@ class DeepSpeech2Model(object): ] return probs_split - def infer_batch_greedy(self, probs_split, vocab_list): - """ + def decode_batch_greedy(self, probs_split, vocab_list): + """Decode by best path for a batch of probs matrix input. + :param probs_split: List of 2-D probability matrix, and each consists of prob vectors for one speech utterancce. :param probs_split: List of matrix @@ -256,11 +257,10 @@ class DeepSpeech2Model(object): self.logger.info("no language model provided, " "decoding by pure beam search without scorer.") - def infer_batch_beam_search(self, probs_split, beam_alpha, beam_beta, - beam_size, cutoff_prob, cutoff_top_n, - vocab_list, num_processes): - """Model inference. Infer the transcription for a batch of speech - utterances. + def decode_batch_beam_search(self, probs_split, beam_alpha, beam_beta, + beam_size, cutoff_prob, cutoff_top_n, + vocab_list, num_processes): + """Decode by beam search for a batch of probs matrix input. :param probs_split: List of 2-D probability matrix, and each consists of prob vectors for one speech utterancce. diff --git a/test.py b/test.py index 24ce54a2b..a82893c03 100644 --- a/test.py +++ b/test.py @@ -102,11 +102,11 @@ def evaluate(): feeding_dict=data_generator.feeding) if args.decoding_method == "ctc_greedy": - result_transcripts = ds2_model.infer_batch_greedy( + result_transcripts = ds2_model.decode_batch_greedy( probs_split=probs_split, vocab_list=vocab_list) else: - result_transcripts = ds2_model.infer_batch_beam_search( + result_transcripts = ds2_model.decode_batch_beam_search( probs_split=probs_split, beam_alpha=args.alpha, beam_beta=args.beta, diff --git a/tools/tune.py b/tools/tune.py index 923e6c3c3..d8e28c58a 100644 --- a/tools/tune.py +++ b/tools/tune.py @@ -128,7 +128,7 @@ def tune(): num_ins += len(target_transcripts) # grid search for index, (alpha, beta) in enumerate(params_grid): - result_transcripts = ds2_model.infer_batch_beam_search( + result_transcripts = ds2_model.decode_batch_beam_search( probs_split=probs_split, beam_alpha=alpha, beam_beta=beta, From dd2588c96b4589284d73528a3a8566875edc6cc4 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Mon, 15 Jan 2018 14:17:07 +0800 Subject: [PATCH 320/335] Merge two if statements in infer --- infer.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/infer.py b/infer.py index ff45a5dc8..4a5f8cb05 100644 --- a/infer.py +++ b/infer.py @@ -90,18 +90,19 @@ def infer(): # decoders only accept string encoded in utf-8 vocab_list = [chars.encode("utf-8") for chars in data_generator.vocab_list] - if args.decoding_method == "ctc_beam_search": - ds2_model.init_ext_scorer(args.alpha, args.beta, args.lang_model_path, - vocab_list) - - ds2_model.logger.info("start inference ...") - probs_split = ds2_model.infer_probs_batch(infer_data=infer_data, - feeding_dict=data_generator.feeding) if args.decoding_method == "ctc_greedy": + ds2_model.logger.info("start inference ...") + probs_split = ds2_model.infer_probs_batch(infer_data=infer_data, + feeding_dict=data_generator.feeding) result_transcripts = ds2_model.decode_batch_greedy( probs_split=probs_split, vocab_list=vocab_list) else: + ds2_model.init_ext_scorer(args.alpha, args.beta, args.lang_model_path, + vocab_list) + ds2_model.logger.info("start inference ...") + probs_split = ds2_model.infer_probs_batch(infer_data=infer_data, + feeding_dict=data_generator.feeding) result_transcripts = ds2_model.decode_batch_beam_search( probs_split=probs_split, beam_alpha=args.alpha, From 7c6fa642cda67554c7731c5e38e955fd7e9b0afc Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Mon, 15 Jan 2018 14:34:59 +0800 Subject: [PATCH 321/335] Rename infer_probs_batch to infer_batch_probs --- deploy/demo_server.py | 2 +- infer.py | 4 ++-- model_utils/model.py | 2 +- test.py | 2 +- tools/tune.py | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/deploy/demo_server.py b/deploy/demo_server.py index eca13dcea..1cafb7a58 100644 --- a/deploy/demo_server.py +++ b/deploy/demo_server.py @@ -166,7 +166,7 @@ def start_server(): # prepare ASR inference handler def file_to_transcript(filename): feature = data_generator.process_utterance(filename, "") - probs_split = ds2_model.infer_probs_batch( + probs_split = ds2_model.infer_batch_probs( infer_data=[feature], feeding_dict=data_generator.feeding) diff --git a/infer.py b/infer.py index 4a5f8cb05..f4d75685b 100644 --- a/infer.py +++ b/infer.py @@ -92,7 +92,7 @@ def infer(): if args.decoding_method == "ctc_greedy": ds2_model.logger.info("start inference ...") - probs_split = ds2_model.infer_probs_batch(infer_data=infer_data, + probs_split = ds2_model.infer_batch_probs(infer_data=infer_data, feeding_dict=data_generator.feeding) result_transcripts = ds2_model.decode_batch_greedy( probs_split=probs_split, @@ -101,7 +101,7 @@ def infer(): ds2_model.init_ext_scorer(args.alpha, args.beta, args.lang_model_path, vocab_list) ds2_model.logger.info("start inference ...") - probs_split = ds2_model.infer_probs_batch(infer_data=infer_data, + probs_split = ds2_model.infer_batch_probs(infer_data=infer_data, feeding_dict=data_generator.feeding) result_transcripts = ds2_model.decode_batch_beam_search( probs_split=probs_split, diff --git a/model_utils/model.py b/model_utils/model.py index a8283fae4..4b3764bf2 100644 --- a/model_utils/model.py +++ b/model_utils/model.py @@ -173,7 +173,7 @@ class DeepSpeech2Model(object): # run inference return self._loss_inferer.infer(input=infer_data) - def infer_probs_batch(self, infer_data, feeding_dict): + def infer_batch_probs(self, infer_data, feeding_dict): """Infer the prob matrices for a batch of speech utterances. :param infer_data: List of utterances to infer, with each utterance diff --git a/test.py b/test.py index a82893c03..e5a3346a0 100644 --- a/test.py +++ b/test.py @@ -97,7 +97,7 @@ def evaluate(): errors_sum, len_refs, num_ins = 0.0, 0, 0 ds2_model.logger.info("start evaluation ...") for infer_data in batch_reader(): - probs_split = ds2_model.infer_probs_batch( + probs_split = ds2_model.infer_batch_probs( infer_data=infer_data, feeding_dict=data_generator.feeding) diff --git a/tools/tune.py b/tools/tune.py index d8e28c58a..da785189f 100644 --- a/tools/tune.py +++ b/tools/tune.py @@ -120,7 +120,7 @@ def tune(): for infer_data in batch_reader(): if (args.num_batches >= 0) and (cur_batch >= args.num_batches): break - probs_split = ds2_model.infer_probs_batch( + probs_split = ds2_model.infer_batch_probs( infer_data=infer_data, feeding_dict=data_generator.feeding) target_transcripts = [ data[1] for data in infer_data ] From a9ccc34a9880b8ed628560dcfe74366064d137c9 Mon Sep 17 00:00:00 2001 From: loongw Date: Tue, 6 Feb 2018 16:29:58 +0800 Subject: [PATCH 322/335] make process_utterance accept file object. --- data_utils/data.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/data_utils/data.py b/data_utils/data.py index 2a6e99b75..59f3c159f 100644 --- a/data_utils/data.py +++ b/data_utils/data.py @@ -97,22 +97,22 @@ class DataGenerator(object): self._local_data.tar2info = {} self._local_data.tar2object = {} - def process_utterance(self, filename, transcript): + def process_utterance(self, audio_file, transcript): """Load, augment, featurize and normalize for speech data. - :param filename: Audio filepath - :type filename: basestring | file + :param audio_file: Filepath or file object of audio file. + :type audio_file: basestring | file :param transcript: Transcription text. :type transcript: basestring :return: Tuple of audio feature tensor and data of transcription part, where transcription part could be token ids or text. :rtype: tuple of (2darray, list) """ - if filename.startswith('tar:'): + if isinstance(audio_file, basestring) and audio_file.startswith('tar:'): speech_segment = SpeechSegment.from_file( - self._subfile_from_tar(filename), transcript) + self._subfile_from_tar(audio_file), transcript) else: - speech_segment = SpeechSegment.from_file(filename, transcript) + speech_segment = SpeechSegment.from_file(audio_file, transcript) self._augmentation_pipeline.transform_audio(speech_segment) specgram, transcript_part = self._speech_featurizer.featurize( speech_segment, self._keep_transcription_text) From 59079f5c001cc6aee33592fd2af5fd89e9bcce0c Mon Sep 17 00:00:00 2001 From: loongw Date: Wed, 7 Feb 2018 11:28:40 +0800 Subject: [PATCH 323/335] Cleanup worker processes every epoch end. --- data_utils/data.py | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/data_utils/data.py b/data_utils/data.py index 2a6e99b75..46298bf75 100644 --- a/data_utils/data.py +++ b/data_utils/data.py @@ -11,7 +11,6 @@ import multiprocessing import numpy as np import paddle.v2 as paddle from threading import local -import atexit from data_utils.utility import read_manifest from data_utils.utility import xmap_readers_mp from data_utils.augmentor.augmentation import AugmentationPipeline @@ -194,15 +193,18 @@ class DataGenerator(object): raise ValueError("Unknown shuffle method %s." % shuffle_method) # prepare batches - instance_reader = self._instance_reader_creator(manifest) + instance_reader, cleanup = self._instance_reader_creator(manifest) batch = [] - for instance in instance_reader(): - batch.append(instance) - if len(batch) == batch_size: + try: + for instance in instance_reader(): + batch.append(instance) + if len(batch) == batch_size: + yield self._padding_batch(batch, padding_to, flatten) + batch = [] + if len(batch) >= min_batch_size: yield self._padding_batch(batch, padding_to, flatten) - batch = [] - if len(batch) >= min_batch_size: - yield self._padding_batch(batch, padding_to, flatten) + finally: + cleanup() self._epoch += 1 return batch_reader @@ -280,10 +282,7 @@ class DataGenerator(object): lambda instance: self.process_utterance(instance["audio_filepath"], instance["text"]), reader, self._num_threads, 4096) - # register callback to main process - atexit.register(cleanup_callback) - - return reader + return reader, cleanup_callback def _padding_batch(self, batch, padding_to=-1, flatten=False): """ From b6a042c40f80785b5b04c2d3761b186734314fa1 Mon Sep 17 00:00:00 2001 From: "chester.kuo" Date: Wed, 28 Feb 2018 14:30:03 +0800 Subject: [PATCH 324/335] Fix Aishell manifest creation mix with train/dev/test set --- data/aishell/aishell.py | 1 + 1 file changed, 1 insertion(+) diff --git a/data/aishell/aishell.py b/data/aishell/aishell.py index 17786b5d4..d3340fc6f 100644 --- a/data/aishell/aishell.py +++ b/data/aishell/aishell.py @@ -52,6 +52,7 @@ def create_manifest(data_dir, manifest_path_prefix): data_types = ['train', 'dev', 'test'] for type in data_types: + del json_lines[:] audio_dir = os.path.join(data_dir, 'wav', type) for subfolder, _, filelist in sorted(os.walk(audio_dir)): for fname in filelist: From 20533f5383838c2ed1d8c185fe0b8c1cbdc92b91 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Wed, 21 Mar 2018 15:54:58 +0800 Subject: [PATCH 325/335] Fix invlid download url. --- models/lm/download_lm_ch.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/lm/download_lm_ch.sh b/models/lm/download_lm_ch.sh index c719f9a9f..d94e286b8 100644 --- a/models/lm/download_lm_ch.sh +++ b/models/lm/download_lm_ch.sh @@ -2,7 +2,7 @@ . ../../utils/utility.sh -URL=http://cloud.dlnel.org/filepub/?uuid=d21861e4-4ed6-45bb-ad8e-ae417a43195e +URL=http://cloud.dlnel.org/filepub/?uuid=5cd1688e-78d9-4b9e-9c2f-6f104bd5b518 MD5="29e02312deb2e59b3c8686c7966d4fe3" TARGET=./zh_giga.no_cna_cmn.prune01244.klm From 9aef6d2b6b6f76714c39ea61cd9ecd7aa6e5a5ad Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Thu, 24 May 2018 11:09:23 +0800 Subject: [PATCH 326/335] Fix the issues link in README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 9480eccb9..6c92f1ec9 100644 --- a/README.md +++ b/README.md @@ -539,4 +539,4 @@ We compare the training time with 1, 2, 4, 8, 16 Tesla K40m GPUs (with a subset ## Questions and Help -You are welcome to submit questions and bug reports in [Github Issues](https://github.com/PaddlePaddle/models/issues). You are also welcome to contribute to this project. +You are welcome to submit questions and bug reports in [Github Issues](https://github.com/PaddlePaddle/DeepSpeech/issues). You are also welcome to contribute to this project. From 4f9a210761d6234acbbc88afdd718a667d52f5d0 Mon Sep 17 00:00:00 2001 From: eric_xu Date: Mon, 4 Jun 2018 10:20:53 +0800 Subject: [PATCH 327/335] update README_cn.md (#207) * add README_cn.md * update fix * delete url --- README_cn.md | 546 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 546 insertions(+) create mode 100644 README_cn.md diff --git a/README_cn.md b/README_cn.md new file mode 100644 index 000000000..c046349fe --- /dev/null +++ b/README_cn.md @@ -0,0 +1,546 @@ +# 语音识别: DeepSpeech2 + +*语音识别: DeepSpeech2*是一个采用[PaddlePaddle](https://github.com/PaddlePaddle/Paddle)平台的端到端自动语音识别(ASR)引擎的开源项目,具体原理参考这篇论文[Baidu's Deep Speech 2 paper](http://proceedings.mlr.press/v48/amodei16.pdf)。 +我们的愿景是为语音识别在工业应用和学术研究上,提供易于使用、高效和可扩展的工具,包括训练,推理,测试模块,以及分布式的[PaddleCloud](https://github.com/PaddlePaddle/cloud)训练和demo部署。同时,我们还将发布一些预训练好的英语和普通话模型。 + +## 目录 +- [安装](#安装) +- [开始](#开始) +- [数据准备](#数据准备) +- [训练模型](#训练模型) +- [数据增强管道](#数据增强管道) +- [推断和评估](#推断和评估) +- [在Docker容器上运行](#在Docker容器上运行) +- [分布式云训练](#分布式云训练) +- [超参数调整](#超参数调整) +- [训练汉语语言](#训练汉语语言) +- [用自己的声音尝试现场演示](#用自己的声音尝试现场演示) +- [发布模型](#发布模型) +- [试验和基准](#试验和基准) +- [问题和帮助](#问题和帮助) + +## 安装 +为了避免环境配置问题,强烈建议在[Docker容器上运行](#在Docker容器上运行),另外请按照下面的指南安装依赖项。 + +### 前提 +- 只支持Python 2.7 +- PaddlePaddle最新版本(请参考[安装指南](https://github.com/PaddlePaddle/Paddle#installation)) + +### 安装 +- 请确保以下库或工具已安装完毕:`pkg-config`, `flac`, `ogg`, `vorbis`, `boost` 和 `swig`, 以上可以通过`apt-get`安装: + +```bash +sudo apt-get install -y pkg-config libflac-dev libogg-dev libvorbis-dev libboost-dev swig +``` + +- 为剩下的依赖项运行安装脚本 + +```bash +git clone https://github.com/PaddlePaddle/DeepSpeech.git +cd DeepSpeech +sh setup.sh +``` + +## 开始 + +`./examples`里的一些shell脚本将帮助我们在一些公开数据集(比如:[LibriSpeech](http://www.openslr.org/12/), [Aishell](http://www.openslr.org/33)) 进行快速尝试,包括了数据准备,模型训练,案例推断和模型评价。阅读这些例子将帮助你理解如何应用你的数据集。 + +`./examples`目录中的一些脚本配置使用了8个GPU。如果你没有8个可用的GPU,请修改`CUDA_VISIBLE_DEVICES`和`--trainer_count`。如果你没有可用的GPU,请设置`--use_gpu`为False,这样程序会用CPU代替GPU。另外如果发生内存不足的问题,减小`--batch_size`即可。 + +让我们先看看[LibriSpeech dataset](http://www.openslr.org/12/)小样本集的例子。 + +- 转到目录 + + ```bash + cd examples/tiny + ``` + + 注意这仅仅是LibriSpeech一个小数据集的例子。如果你想尝试完整的数据集(可能需要花好几天来训练模型),请使用这个路径`examples/librispeech`。 +- 准备数据 + + ```bash + sh run_data.sh + ``` + + 运行`run_data.sh`脚本将会下载数据集,产出manifests文件,收集一些归一化需要的统计信息并建立词表。当数据准备完成之后,下载完的数据(仅有LibriSpeech一部分)在`~/.cache/paddle/dataset/speech/libri`中;其对应的manifest文件,均值标准差和词表文件在`./data/tiny`中。在第一次执行的时候一定要执行这个脚本,在接下来所有的实验中我们都会用到这个数据集。 +- 训练你自己的ASR模型 + + ```bash + sh run_train.sh + ``` + + `run_train.sh`将会启动训练任务,训练日志会打印到stdout,并且模型每个时期(epoch)的检查点都会保存到`./checkpoints/tiny`目录中。这些检查点可以用来恢复训练,推断,评价和部署。 +- 用已有的模型进行案例推断 + + ```bash + sh run_infer.sh + ``` + + `run_infer.sh`将会利用训完的模型展现一些(默认10个)样本语音到文本的解码结果。由于当前模型只使用了LibriSpeech一部分数据集训练,因此性能可能不会太好。为了看到更好模型上的表现,你可以下载一个已训练好的模型(用完整的LibriSpeech训练了好几天)来做推断。 + + ```bash + sh run_infer_golden.sh + ``` +- 评价一个已经存在的模型 + + ```bash + sh run_test.sh + ``` + + `run_test.sh`能够利用误字率(或字符错误率)来评价模型。类似的,你可以下载一个完全训练好的模型来测试它的性能: + + ```bash + sh run_test_golden.sh + ``` + +更多细节会在接下来的章节中阐述。祝你在*语音识别: DeepSpeech2*ASR引擎学习中过得愉快! + + +## 数据准备 + +### 生成Manifest + +*语音识别: DeepSpeech2*接受文本**manifest**文件作为数据接口。manifest文件包含了一系列语音数据,其中每一行代表一个json格式的音频元数据(比如文件路径,描述,时长)。具体格式如下: + +``` +{"audio_filepath": "/home/work/.cache/paddle/Libri/134686/1089-134686-0001.flac", "duration": 3.275, "text": "stuff it into you his belly counselled him"} +{"audio_filepath": "/home/work/.cache/paddle/Libri/134686/1089-134686-0007.flac", "duration": 4.275, "text": "a cold lucid indifference reigned in his soul"} +``` + +如果你要使用自定义数据,你只需要按照以上格式生成自己的manifest文件即可。训练,推断以及其他所有模块都能够根据manifest文件获取到音频数据,包括他们的元数据。 + +关于如何生成manifest文件,请参考`data/librispeech/librispeech.py`。该脚本将会下载LibriSpeech数据集并生成manifest文件。 + +### 计算均值和标准差用于归一化 + +为了对音频特征进行z-score归一化(零均值,单位标准差),我们必须预估一些训练样本特征的均值和标准差: + +```bash +python tools/compute_mean_std.py \ +--num_samples 2000 \ +--specgram_type linear \ +--manifest_paths data/librispeech/manifest.train \ +--output_path data/librispeech/mean_std.npz +``` + +以上这段代码会计算在`data/librispeech/manifest.train`路径中,2000个随机采样音频剪辑的功率谱特征均值和标准差,并将结果保存在`data/librispeech/mean_std.npz`中,方便以后使用。 + +### 建立词表 + +转换录音为索引用于训练,解码,再将一系列索引转换为文本等操作需要一个可能会出现字符集合的词表。`tools/build_vocab.py`脚本将生成这种基于字符的词表。 + +```bash +python tools/build_vocab.py \ +--count_threshold 0 \ +--vocab_path data/librispeech/eng_vocab.txt \ +--manifest_paths data/librispeech/manifest.train +``` + +他将`data/librispeech/manifest.train`目录中的所有录音文本写入词表文件`data/librispeeech/eng_vocab.txt`,并且没有词汇截断(`--count_threshold 0`)。 + +### 更多帮助 + +获得更多帮助: + +```bash +python data/librispeech/librispeech.py --help +python tools/compute_mean_std.py --help +python tools/build_vocab.py --help +``` + +## 训练模型 + +`train.py`是训练模块的主要调用者。使用示例如下。 + +- 开始使用8片GPU训练: + + ``` + CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python train.py --trainer_count 8 + ``` + +- 开始使用16片GPU训练: + + ``` + python train.py --use_gpu False --trainer_count 16 + ``` + +- 从检查点恢复训练: + + ``` + CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ + python train.py \ + --init_model_path CHECKPOINT_PATH_TO_RESUME_FROM + ``` + +获得更多帮助: + +```bash +python train.py --help +``` +或参考 `example/librispeech/run_train.sh`. + +## 数据增强管道 + +数据增强是用来提升深度学习性能的非常有效的技术。我们通过在原始音频中添加小随机扰动(标签不变转换)获得新音频来增强我们的语音数据。你不必自己合成,因为数据增强已经嵌入到数据提供者中,能在训练模型时每个epoch中随机的合成音频。 + +目前提供六个可选的增强组件供选择,配置并插入处理流水线。 + + - 音量扰动 + - 速度扰动 + - 移动扰动 + - 在线贝叶斯归一化 + - 噪声干扰(需要背景噪音的音频文件) + - 脉冲响应(需要脉冲音频文件) + +为了让训练模块知道需要哪些增强组件以及它们的处理顺序,我们需要事先准备一个[JSON](http://www.json.org/)格式的*扩展配置文件*。例如: + +``` +[{ + "type": "speed", + "params": {"min_speed_rate": 0.95, + "max_speed_rate": 1.05}, + "prob": 0.6 +}, +{ + "type": "shift", + "params": {"min_shift_ms": -5, + "max_shift_ms": 5}, + "prob": 0.8 +}] +``` + +当`trainer.py`的`--augment_conf_file`参数被设置为上述示例配置文件的路径时,每个epoch中的每个音频片段都将被处理。首先,均匀随机采样速率会有60%的概率在0.95和1.05之间对音频片段进行速度扰动。然后,音频片段有80%的概率在时间上被挪移,挪移偏差值是-5毫秒和5毫秒之间的随机采样。最后,这个新合成的音频片段将被传送给特征提取器,以用于接下来的训练。 + +有关其他配置实例,请参考`conf/augmenatation.config.example`. + +使用数据增强技术时要小心,由于扩大了训练和测试集的差异,不恰当的增强会对训练模型不利。 + +## 推断和评价 + +### 准备语言模型 + +提升解码器的性能需要准备语言模型。我们准备了两种语言模型(有损压缩)供用户下载和尝试。一个是英语模型,另一个是普通话模型。用户可以执行以下命令来下载已经训练好的语言模型: + +```bash +cd models/lm +sh download_lm_en.sh +sh download_lm_ch.sh +``` + +如果你想训练自己更好的语言模型,请参考[KenLM](https://github.com/kpu/kenlm)获取教程。在这里,我们提供一些技巧来展示我们如何准备我们的英语和普通话模型。开始训练的时候,你可以参考这些技巧。 + + +#### 英语语言模型 + +英语语料库来自[Common Crawl Repository](http://commoncrawl.org),您可以从[statmt](http://data.statmt.org/ngrams/deduped_en)下载它。我们使用en.00部分来训练我们的英语语言模型。训练前有一些预处理步骤如下: + + * 不在\[A-Za-z0-9\s'\](\s表示空白字符)中的字符将被删除,阿拉伯数字被转换为英文数字,比如“1000”转换为one thousand。 + * 重复的空白字符被压缩为一个,并且开始的空白字符将被删除。请注意,所有的录音都是小写字母,因此所有字符都转换为小写字母。 + * 选择前40万个最常用的单词来建立词表,其余部分将被替换为“UNKNOWNWORD”。 + +现在预处理完成了,我们得到一个干净的语料库来训练语言模型。我们发布的语言模型版本使用了参数“-o 5 --prune 0 1 1 1 1”来训练。“-o 5”表示语言模型的最大order为5。“--prune 0 1 1 1 1”表示每个order的计数阈值,更具体地说,它将第2个以及更高的order修剪为单个。为了节省磁盘存储空间,我们将使用参数“-a 22 -q 8 -b 8”将arpa文件转换为“trie”二进制文件。“-a”表示在“trie”中用于切分的指针的最高位数。“-q -b”是概率和退避的量化参数。 + +#### 普通话语言模型 + +与英语语言模型不同的是,普通话语言模型是基于字符的,其中每一位都是中文汉字。我们使用内部语料库来训练发布的汉语语言模型。该语料库包含数十亿汉字。预处理阶段与英语语言模型差别很小,主要步骤包括: + + * 删除开始和结尾的空白字符。 + * 删除英文标点和中文标点。 + * 在两个字符之间插入空白字符。 + +请注意,发布的语言模型只包含中文简体字。预处理完成后,我们开始训练语言模型。这个小的语言模型训练关键参数是“-o 5 --prune 0 1 2 4 4”,“-o 5”是针对大语言模型。请参考上面的部分了解每个参数的含义。我们还使用默认设置将arpa文件转换为二进制文件。 + +### 语音到文本推断 + +推断模块调用者为`infer.py`,可以用来推断,解码,以及给一些给定音频剪辑进行可视化语音到文本的结果。这有助于对ASR模型的性能进行直观和定性的评估。 + +- GPU版本的推断: + + ```bash + CUDA_VISIBLE_DEVICES=0 python infer.py --trainer_count 1 + ``` + +- CPU版本的推断: + + ```bash + python infer.py --use_gpu False --trainer_count 12 + ``` + +我们提供两种类型的CTC解码器:*CTC贪心解码器*和*CTC波束搜索解码器*。*CTC贪心解码器*是简单的最佳路径解码算法的实现,在每个时间步选择最可能的字符,因此是贪心的并且是局部最优的。[*CTC波束搜索解码器*](https://arxiv.org/abs/1408.2873)另外使用了启发式广度优先图搜索以达到近似全局最优; 它也需要预先训练的KenLM语言模型以获得更好的评分和排名。解码器类型可以用参数`--decoding_method`设置。 + +获得更多帮助: + +``` +python infer.py --help +``` +或参考`example/librispeech/run_infer.sh`. + +### 评估模型 + +要定量评估模型的性能,请运行: + +- 带GPU版本评估 + + ```bash + CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python test.py --trainer_count 8 + ``` + +- CPU版本评估 + + ```bash + python test.py --use_gpu False --trainer_count 12 + ``` + +错误率(默认:误字率;可以用--error_rate_type设置)将被打印出来。 + +获得更多帮助: + +```bash +python test.py --help +``` +或参考`example/librispeech/run_test.sh`. + +## 超参数调整 + +[*CTC波束搜索解码器*](https://arxiv.org/abs/1408.2873)的超参数$\alpha$(语言模型权重)和$\beta$(单词插入权重)对解码器的性能有非常显著的影响。当声学模型更新时,最好在验证集上重新调整它们。 + +`tools/tune.py`会进行2维网格查找超参数$\alpha$和$\beta$。您必须提供$\alpha$和$\beta$的范围,以及尝试的次数。 + +- 带GPU版的调整: + + ```bash + CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ + python tools/tune.py \ + --trainer_count 8 \ + --alpha_from 1.0 \ + --alpha_to 3.2 \ + --num_alphas 45 \ + --beta_from 0.1 \ + --beta_to 0.45 \ + --num_betas 8 + ``` + +- CPU版的调整: + + ```bash + python tools/tune.py --use_gpu False + ``` +网格搜索将会在超参数空间的每个点处打印出WER(误字率)或者CER(字符错误率),并且可选择绘出误差曲面。合适的超参数范围应包括WER/CER误差表面的全局最小值,如下图所示。 + +

+ +
调整LibriSpeech的dev-clean集合的误差曲面示例 +

+ +通常,如图所示,语言模型权重($\alpha$)的变化显著影响CTC波束搜索解码器的性能。更好的方法是首先调整多批数据(可指定数量)以找出适当的超参数范围,然后更改为整个验证集以进行精确调整。 + +调整之后,您可以在推理和评价模块中重置$\alpha$和$\beta$,以检查它们是否真的有助于提高ASR性能。更多帮助如下: + +```bash +python tune.py --help +``` +或参考`example/librispeech/run_tune.sh`. + +## 在Docker容器上运行 + +Docker是一个开源工具,用于在孤立的环境中构建,发布和运行分布式应用程序。此项目的Docker镜像已在[hub.docker.com](https://hub.docker.com)中提供,并安装了所有依赖项,其中包括预先构建的PaddlePaddle,CTC解码器以及其他必要的Python和第三方库。这个Docker映像需要NVIDIA GPU的支持,所以请确保它的可用性并已完成[nvidia-docker](https://github.com/NVIDIA/nvidia-docker)的安装。 + +采取以下步骤来启动Docker镜像: + +- 下载Docker镜像 + +```bash +nvidia-docker pull paddlepaddle/deep_speech:latest-gpu +``` + +- git clone这个资源库 + +``` +git clone https://github.com/PaddlePaddle/DeepSpeech.git +``` + +- 运行Docker镜像 + +```bash +sudo nvidia-docker run -it -v $(pwd)/DeepSpeech:/DeepSpeech paddlepaddle/deep_speech:latest-gpu /bin/bash +``` + +现在返回并从[开始](#开始)部分开始,您可以在Docker容器中同样执行模型训练,推断和超参数调整。 + +## 分布式云训练 + +我们还为用户提供云训练模块[PaddleCloud](https://github.com/PaddlePaddle/cloud)以便用户进行集群训练,利用多台机器达到更快的训练速度。首先,请按照[PaddleCloud用法](https://github.com/PaddlePaddle/cloud/blob/develop/doc/usage_cn.md#%E4%B8%8B%E8%BD%BD%E5%B9%B6%E9%85%8D%E7%BD%AEpaddlecloud)安装PaddleCloud客户端并注册PaddleCloud账户。 + +请按照以下步骤提交训练任务: + +- 转到目录: + + ```bash + cd cloud + ``` +- 上传数据: + + 数据必须上传到PaddleCloud文件系统才能在云作业中访问。`pcloud_upload_data.sh`负责进行数据打包和上传: + + ```bash + sh pcloud_upload_data.sh + ``` + + 给定manifest文件,`pcloud_upload_data.sh`会进行以下处理: + + - 提取输入清单中列出的音频文件。 + - 将它们打包成指定数量的tar文件。 + - 将这些tar文件上传到PaddleCloud文件系统。 + - 通过用PaddleCloud文件系统路径替换本地文件系统路径来创建云manifest文件。云作业将通过新的manifest文件获取到音频文件的位置及其元信息。 + + 对于云训练模型来说以上步骤只需做一次。之后这些数据会在云文件系统上保持不变,并可在之后的任务中反复使用。 + + 有关参数的详细信息,请参考[在PaddleCloud上训练DeepSpeech2](https://github.com/PaddlePaddle/DeepSpeech/tree/develop/cloud)。 + + - 配置训练参数 + + 在`pcloud_submit.sh`中配置云任务参数(例如`NUM_NODES`,`NUM_GPUS`,`CLOUD_TRAIN_DIR`,`JOB_NAME`等),然后在`pcloud_train.sh`中配置其他的超参数训练(和本地训练一样)。 + + 有关参数的详细信息,请参阅[在PaddleCloud上训练DeepSpeech2](https://github.com/PaddlePaddle/DeepSpeech/tree/develop/cloud)。 + + + - 提交任务 + + 运行: + + ```bash + sh pcloud_submit.sh + ``` + 一个训练任务已经提交给PaddleCloud,并将任务名输出到控制台。 + + - 获取训练日志 + + 执行以下命令以列出你提交的所有任务以及它们的运行状态: + + ```bash + paddlecloud get jobs + ``` + + 运行此操作,将打印相应的任务日志。 + + ```bash + paddlecloud logs -n 10000 $REPLACED_WITH_YOUR_ACTUAL_JOB_NAME + ``` + +有关PaddleCloud用法的更多信息,请参阅[PaddleCloud用法](https://github.com/PaddlePaddle/cloud/blob/develop/doc/usage_cn.md#提交任务)。 + +有关PaddleCloud的DeepSpeech2训练的更多信息,请参阅 +[Train DeepSpeech2 on PaddleCloud](https://github.com/PaddlePaddle/DeepSpeech/tree/develop/cloud). + +## 训练普通话语言 + +普通话语言训练与英语训练的关键步骤相同,我们提供了一个```examples/aishell```中Aishell的普通话训练例子。如上所述,请执行```sh run_data.sh```, ```sh run_train.sh```, ```sh run_test.sh```和```sh run_infer.sh```做相应的数据准备,训练,测试和推断。我们还准备了一个预训练过的模型(执行./models/aishell/download_model.sh下载)供用户使用```run_infer_golden.sh```和```run_test_golden.sh```来。请注意,与英语语言模型不同,普通话语言模型是基于汉字的,请运行```tools/tune.py```来查找最佳设置。 + +##用自己的声音尝试现场演示 + +到目前为止,一个ASR模型已经训练完毕,并且进行了定性测试(`infer.py`)和用现有的音频文件进行定量测试(`test.py`)。但目前还没有用你自己的声音进行测试。`deploy/demo_server.py`和`deploy/demo_client.py`能够快速构建一个利用训完的模型,对ASR引擎进行实时演示系统,使你能够用自己的语音测试和演示。 + +要启动演示服务,请在控制台中运行: + +```bash +CUDA_VISIBLE_DEVICES=0 \ +python deploy/demo_server.py \ +--trainer_count 1 \ +--host_ip localhost \ +--host_port 8086 +``` + +对于运行demo客户端的机器(可能不是同一台机器),请在继续之前执行以下安装。 + +比如,对于MAC OS X机器: + +```bash +brew install portaudio +pip install pyaudio +pip install pynput +``` + +然后启动客户端,请在另一个控制台中运行: + +```bash +CUDA_VISIBLE_DEVICES=0 \ +python -u deploy/demo_client.py \ +--host_ip 'localhost' \ +--host_port 8086 +``` + +现在,在客户端控制台中,按下`whitespace`键,按住并开始讲话。讲话完毕请释放该键以让控制台中显示的语音到文本结果。要退出客户端,只需按`ESC`键。 + +请注意,`deploy/demo_client.py`必须在带麦克风设备的机器上运行,而`deploy/demo_server.py`可以在没有任何录音硬件的情况下运行,例如任何远程服务器机器。如果服务器和客户端使用两台独立的机器运行,只需要注意将`host_ip`和`host_port`参数设置为实际可访问的IP地址和端口。如果它们在单台机器上运行,则不用作任何处理。 + +请参考`examples/mandarin/run_demo_server.sh`,它将首先下载一个预先训练过的普通话模型(用3000小时的内部语音数据训练),然后用模型启动演示服务器。通过运行`examples/mandarin/run_demo_client.sh`,你可以说普通话来测试它。如果您想尝试其他模型,只需更新脚本中的`--model_path`参数即可。 + +获得更多帮助: + +```bash +python deploy/demo_server.py --help +python deploy/demo_client.py --help +``` + +## 发布模型 + +#### 语音模型发布 + +语种 | 模型名 | 训练数据 | 语音时长 +:-----------: | :------------: | :----------: | -------: +English | [LibriSpeech Model](http://cloud.dlnel.org/filepub/?uuid=117cde63-cd59-4948-8b80-df782555f7d6) | [LibriSpeech Dataset](http://www.openslr.org/12/) | 960 h +English | [BaiduEN8k Model](http://cloud.dlnel.org/filepub/?uuid=37a1c211-ec47-494c-973c-31437a10ae90) | Baidu Internal English Dataset | 8628 h +Mandarin | [Aishell Model](http://cloud.dlnel.org/filepub/?uuid=61de63b9-6904-4809-ad95-0cc5104ab973) | [Aishell Dataset](http://www.openslr.org/33/) | 151 h +Mandarin | [BaiduCN1.2k Model](http://cloud.dlnel.org/filepub/?uuid=499569a6-0025-4f40-83e6-1c99527431a6) | Baidu Internal Mandarin Dataset | 1204 h + +#### 语言模型发布 + +语言模型 | 训练数据 | 基于的字符 | 大小 | 描述 +:-------------:| :------------:| :-----: | -----: | :----------------- +[English LM](http://paddlepaddle.bj.bcebos.com/model_zoo/speech/common_crawl_00.prune01111.trie.klm) | [CommonCrawl(en.00)](http://web-language-models.s3-website-us-east-1.amazonaws.com/ngrams/en/deduped/en.00.deduped.xz) | Word-based | 8.3 GB | Pruned with 0 1 1 1 1;
About 1.85 billion n-grams;
'trie' binary with '-a 22 -q 8 -b 8' +[Mandarin LM Small](http://cloud.dlnel.org/filepub/?uuid=d21861e4-4ed6-45bb-ad8e-ae417a43195e) | Baidu Internal Corpus | Char-based | 2.8 GB | Pruned with 0 1 2 4 4;
About 0.13 billion n-grams;
'probing' binary with default settings +[Mandarin LM Large](http://cloud.dlnel.org/filepub/?uuid=245d02bb-cd01-4ebe-b079-b97be864ec37) | Baidu Internal Corpus | Char-based | 70.4 GB | No Pruning;
About 3.7 billion n-grams;
'probing' binary with default settings + +## 实验和基准 + +#### 英语模型的基准测试结果(字错误率) + +测试集 | LibriSpeech Model | BaiduEN8K Model +:--------------------- | ---------------: | -------------------: +LibriSpeech Test-Clean | 6.85 | 5.41 +LibriSpeech Test-Other | 21.18 | 13.85 +VoxForge American-Canadian | 12.12 |   7.13 +VoxForge Commonwealth | 19.82 | 14.93 +VoxForge European | 30.15 | 18.64 +VoxForge Indian | 53.73 | 25.51 +Baidu Internal Testset  |   40.75 |   8.48 + +为了在VoxForge数据上重现基准测试结果,我们提供了一个脚本来下载数据并生成VoxForge方言manifest文件。请到```data/voxforge```执行````run_data.sh```来获取VoxForge方言manifest文件。请注意,VoxForge数据可能会持续更新,生成的清单文件可能与我们评估的清单文件有所不同。 + + +#### 普通话模型的基准测试结果(字符错误率) + +测试集 | BaiduCN1.2k Model +:--------------------- | -------------------: +Baidu Internal Testset | 12.64 + +#### 多GPU加速 + +我们对1,2,4,8,16个Tesla K40m GPU的训练时间(LibriSpeech样本的子集,其音频持续时间介于6.0和7.0秒之间)进行比较。它表明,已经实现了具有多个GPU的**近线性**加速。在下图中,训练的时间(以秒为单位)显示在蓝色条上。 + +
+ +| # of GPU | 加速比 | +| -------- | --------------: | +| 1 | 1.00 X | +| 2 | 1.97 X | +| 4 | 3.74 X | +| 8 | 6.21 X | +|16 | 10.70 X | + +`tools/profile.sh`提供了上述分析工具. + +## 问题和帮助 + +欢迎您在[Github问题](https://github.com/PaddlePaddle/models/issues)中提交问题和bug。也欢迎您为这个项目做出贡献。 + From 45a867b7308032dc70c7e6dddf4045b7f791d83b Mon Sep 17 00:00:00 2001 From: eric_xu Date: Mon, 4 Jun 2018 10:42:11 +0800 Subject: [PATCH 328/335] fix README_cn.md --- README_cn.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README_cn.md b/README_cn.md index c046349fe..f8ed7f100 100644 --- a/README_cn.md +++ b/README_cn.md @@ -227,7 +227,7 @@ sh download_lm_en.sh sh download_lm_ch.sh ``` -如果你想训练自己更好的语言模型,请参考[KenLM](https://github.com/kpu/kenlm)获取教程。在这里,我们提供一些技巧来展示我们如何准备我们的英语和普通话模型。开始训练的时候,你可以参考这些技巧。 +如果你想训练自己更好的语言模型,请参考[KenLM](https://github.com/kpu/kenlm)获取教程。在这里,我们提供一些技巧来展示我们如何准备我们的英语和普通话模型。开始训练的时候,你可以参考这些技巧。 #### 英语语言模型 From 893b81ea7657ecd4b01b20e2b7c524ac7c763f46 Mon Sep 17 00:00:00 2001 From: eric_xu Date: Mon, 4 Jun 2018 10:50:34 +0800 Subject: [PATCH 329/335] Update README_cn.md --- README_cn.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README_cn.md b/README_cn.md index f8ed7f100..d4d80ecc9 100644 --- a/README_cn.md +++ b/README_cn.md @@ -9,7 +9,7 @@ - [数据准备](#数据准备) - [训练模型](#训练模型) - [数据增强管道](#数据增强管道) -- [推断和评估](#推断和评估) +- [推断和评价](#推断和评价) - [在Docker容器上运行](#在Docker容器上运行) - [分布式云训练](#分布式云训练) - [超参数调整](#超参数调整) From 445d84ee2635e0a5fcd7c5f41fc7f2962c1fc4c4 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Tue, 23 Apr 2019 08:35:37 +0000 Subject: [PATCH 330/335] Update lm & acoustic models' link --- README.md | 14 +++++++------- README_cn.md | 27 +++++++++++++-------------- 2 files changed, 20 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index 6c92f1ec9..6e7dd2a44 100644 --- a/README.md +++ b/README.md @@ -486,18 +486,18 @@ python deploy/demo_client.py --help Language | Model Name | Training Data | Hours of Speech :-----------: | :------------: | :----------: | -------: -English | [LibriSpeech Model](http://cloud.dlnel.org/filepub/?uuid=117cde63-cd59-4948-8b80-df782555f7d6) | [LibriSpeech Dataset](http://www.openslr.org/12/) | 960 h -English | [BaiduEN8k Model](http://cloud.dlnel.org/filepub/?uuid=37a1c211-ec47-494c-973c-31437a10ae90) | Baidu Internal English Dataset | 8628 h -Mandarin | [Aishell Model](http://cloud.dlnel.org/filepub/?uuid=61de63b9-6904-4809-ad95-0cc5104ab973) | [Aishell Dataset](http://www.openslr.org/33/) | 151 h -Mandarin | [BaiduCN1.2k Model](http://cloud.dlnel.org/filepub/?uuid=499569a6-0025-4f40-83e6-1c99527431a6) | Baidu Internal Mandarin Dataset | 1204 h +English | [LibriSpeech Model](https://deepspeech.bj.bcebos.com/eng_models/librispeech_model.tar.gz) | [LibriSpeech Dataset](http://www.openslr.org/12/) | 960 h +English | [BaiduEN8k Model](https://deepspeech.bj.bcebos.com/demo_models/baidu_en8k_model.tar.gz) | Baidu Internal English Dataset | 8628 h +Mandarin | [Aishell Model](https://deepspeech.bj.bcebos.com/mandarin_models/aishell_model.tar.gz) | [Aishell Dataset](http://www.openslr.org/33/) | 151 h +Mandarin | [BaiduCN1.2k Model](https://deepspeech.bj.bcebos.com/demo_models/baidu_cn1.2k_model.tar.gz) | Baidu Internal Mandarin Dataset | 1204 h #### Language Model Released Language Model | Training Data | Token-based | Size | Descriptions :-------------:| :------------:| :-----: | -----: | :----------------- -[English LM](http://paddlepaddle.bj.bcebos.com/model_zoo/speech/common_crawl_00.prune01111.trie.klm) | [CommonCrawl(en.00)](http://web-language-models.s3-website-us-east-1.amazonaws.com/ngrams/en/deduped/en.00.deduped.xz) | Word-based | 8.3 GB | Pruned with 0 1 1 1 1;
About 1.85 billion n-grams;
'trie' binary with '-a 22 -q 8 -b 8' -[Mandarin LM Small](http://cloud.dlnel.org/filepub/?uuid=d21861e4-4ed6-45bb-ad8e-ae417a43195e) | Baidu Internal Corpus | Char-based | 2.8 GB | Pruned with 0 1 2 4 4;
About 0.13 billion n-grams;
'probing' binary with default settings -[Mandarin LM Large](http://cloud.dlnel.org/filepub/?uuid=245d02bb-cd01-4ebe-b079-b97be864ec37) | Baidu Internal Corpus | Char-based | 70.4 GB | No Pruning;
About 3.7 billion n-grams;
'probing' binary with default settings +[English LM](https://deepspeech.bj.bcebos.com/en_lm/common_crawl_00.prune01111.trie.klm) | [CommonCrawl(en.00)](http://web-language-models.s3-website-us-east-1.amazonaws.com/ngrams/en/deduped/en.00.deduped.xz) | Word-based | 8.3 GB | Pruned with 0 1 1 1 1;
About 1.85 billion n-grams;
'trie' binary with '-a 22 -q 8 -b 8' +[Mandarin LM Small](https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm) | Baidu Internal Corpus | Char-based | 2.8 GB | Pruned with 0 1 2 4 4;
About 0.13 billion n-grams;
'probing' binary with default settings +[Mandarin LM Large](https://deepspeech.bj.bcebos.com/zh_lm/zhidao_giga.klm) | Baidu Internal Corpus | Char-based | 70.4 GB | No Pruning;
About 3.7 billion n-grams;
'probing' binary with default settings ## Experiments and Benchmarks diff --git a/README_cn.md b/README_cn.md index d4d80ecc9..c2304dcb6 100644 --- a/README_cn.md +++ b/README_cn.md @@ -55,14 +55,14 @@ sh setup.sh cd examples/tiny ``` - 注意这仅仅是LibriSpeech一个小数据集的例子。如果你想尝试完整的数据集(可能需要花好几天来训练模型),请使用这个路径`examples/librispeech`。 + 注意这仅仅是LibriSpeech一个小数据集的例子。如果你想尝试完整的数据集(可能需要花好几天来训练模型),请使用这个路径`examples/librispeech`。 - 准备数据 ```bash sh run_data.sh ``` - 运行`run_data.sh`脚本将会下载数据集,产出manifests文件,收集一些归一化需要的统计信息并建立词表。当数据准备完成之后,下载完的数据(仅有LibriSpeech一部分)在`~/.cache/paddle/dataset/speech/libri`中;其对应的manifest文件,均值标准差和词表文件在`./data/tiny`中。在第一次执行的时候一定要执行这个脚本,在接下来所有的实验中我们都会用到这个数据集。 + 运行`run_data.sh`脚本将会下载数据集,产出manifests文件,收集一些归一化需要的统计信息并建立词表。当数据准备完成之后,下载完的数据(仅有LibriSpeech一部分)在`~/.cache/paddle/dataset/speech/libri`中;其对应的manifest文件,均值标准差和词表文件在`./data/tiny`中。在第一次执行的时候一定要执行这个脚本,在接下来所有的实验中我们都会用到这个数据集。 - 训练你自己的ASR模型 ```bash @@ -163,7 +163,7 @@ python tools/build_vocab.py --help ``` python train.py --use_gpu False --trainer_count 16 ``` - + - 从检查点恢复训练: ``` @@ -233,7 +233,7 @@ sh download_lm_ch.sh #### 英语语言模型 英语语料库来自[Common Crawl Repository](http://commoncrawl.org),您可以从[statmt](http://data.statmt.org/ngrams/deduped_en)下载它。我们使用en.00部分来训练我们的英语语言模型。训练前有一些预处理步骤如下: - + * 不在\[A-Za-z0-9\s'\](\s表示空白字符)中的字符将被删除,阿拉伯数字被转换为英文数字,比如“1000”转换为one thousand。 * 重复的空白字符被压缩为一个,并且开始的空白字符将被删除。请注意,所有的录音都是小写字母,因此所有字符都转换为小写字母。 * 选择前40万个最常用的单词来建立词表,其余部分将被替换为“UNKNOWNWORD”。 @@ -414,7 +414,7 @@ sudo nvidia-docker run -it -v $(pwd)/DeepSpeech:/DeepSpeech paddlepaddle/deep_sp 一个训练任务已经提交给PaddleCloud,并将任务名输出到控制台。 - 获取训练日志 - + 执行以下命令以列出你提交的所有任务以及它们的运行状态: ```bash @@ -422,7 +422,7 @@ sudo nvidia-docker run -it -v $(pwd)/DeepSpeech:/DeepSpeech paddlepaddle/deep_sp ``` 运行此操作,将打印相应的任务日志。 - + ```bash paddlecloud logs -n 10000 $REPLACED_WITH_YOUR_ACTUAL_JOB_NAME ``` @@ -488,18 +488,18 @@ python deploy/demo_client.py --help 语种 | 模型名 | 训练数据 | 语音时长 :-----------: | :------------: | :----------: | -------: -English | [LibriSpeech Model](http://cloud.dlnel.org/filepub/?uuid=117cde63-cd59-4948-8b80-df782555f7d6) | [LibriSpeech Dataset](http://www.openslr.org/12/) | 960 h -English | [BaiduEN8k Model](http://cloud.dlnel.org/filepub/?uuid=37a1c211-ec47-494c-973c-31437a10ae90) | Baidu Internal English Dataset | 8628 h -Mandarin | [Aishell Model](http://cloud.dlnel.org/filepub/?uuid=61de63b9-6904-4809-ad95-0cc5104ab973) | [Aishell Dataset](http://www.openslr.org/33/) | 151 h -Mandarin | [BaiduCN1.2k Model](http://cloud.dlnel.org/filepub/?uuid=499569a6-0025-4f40-83e6-1c99527431a6) | Baidu Internal Mandarin Dataset | 1204 h +English | [LibriSpeech Model](https://deepspeech.bj.bcebos.com/eng_models/librispeech_model.tar.gz) | [LibriSpeech Dataset](http://www.openslr.org/12/) | 960 h +English | [BaiduEN8k Model](https://deepspeech.bj.bcebos.com/demo_models/baidu_en8k_model.tar.gz) | Baidu Internal English Dataset | 8628 h +Mandarin | [Aishell Model](https://deepspeech.bj.bcebos.com/mandarin_models/aishell_model.tar.gz) | [Aishell Dataset](http://www.openslr.org/33/) | 151 h +Mandarin | [BaiduCN1.2k Model](https://deepspeech.bj.bcebos.com/demo_models/baidu_cn1.2k_model.tar.gz) | Baidu Internal Mandarin Dataset | 1204 h #### 语言模型发布 语言模型 | 训练数据 | 基于的字符 | 大小 | 描述 :-------------:| :------------:| :-----: | -----: | :----------------- -[English LM](http://paddlepaddle.bj.bcebos.com/model_zoo/speech/common_crawl_00.prune01111.trie.klm) | [CommonCrawl(en.00)](http://web-language-models.s3-website-us-east-1.amazonaws.com/ngrams/en/deduped/en.00.deduped.xz) | Word-based | 8.3 GB | Pruned with 0 1 1 1 1;
About 1.85 billion n-grams;
'trie' binary with '-a 22 -q 8 -b 8' -[Mandarin LM Small](http://cloud.dlnel.org/filepub/?uuid=d21861e4-4ed6-45bb-ad8e-ae417a43195e) | Baidu Internal Corpus | Char-based | 2.8 GB | Pruned with 0 1 2 4 4;
About 0.13 billion n-grams;
'probing' binary with default settings -[Mandarin LM Large](http://cloud.dlnel.org/filepub/?uuid=245d02bb-cd01-4ebe-b079-b97be864ec37) | Baidu Internal Corpus | Char-based | 70.4 GB | No Pruning;
About 3.7 billion n-grams;
'probing' binary with default settings +[English LM](https://deepspeech.bj.bcebos.com/en_lm/common_crawl_00.prune01111.trie.klm) | [CommonCrawl(en.00)](http://web-language-models.s3-website-us-east-1.amazonaws.com/ngrams/en/deduped/en.00.deduped.xz) | Word-based | 8.3 GB | Pruned with 0 1 1 1 1;
About 1.85 billion n-grams;
'trie' binary with '-a 22 -q 8 -b 8' +[Mandarin LM Small](https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm) | Baidu Internal Corpus | Char-based | 2.8 GB | Pruned with 0 1 2 4 4;
About 0.13 billion n-grams;
'probing' binary with default settings +[Mandarin LM Large](https://deepspeech.bj.bcebos.com/zh_lm/zhidao_giga.klm) | Baidu Internal Corpus | Char-based | 70.4 GB | No Pruning;
About 3.7 billion n-grams;
'probing' binary with default settings ## 实验和基准 @@ -543,4 +543,3 @@ Baidu Internal Testset | 12.64 ## 问题和帮助 欢迎您在[Github问题](https://github.com/PaddlePaddle/models/issues)中提交问题和bug。也欢迎您为这个项目做出贡献。 - From 95e5d1f5ab2e762ebc03d5d6f51c40369b5b1d3d Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Tue, 11 Jun 2019 09:59:47 +0000 Subject: [PATCH 331/335] Update the urls in download scripts --- models/aishell/download_model.sh | 2 +- models/baidu_en8k/download_model.sh | 2 +- models/librispeech/download_model.sh | 2 +- models/lm/download_lm_ch.sh | 2 +- models/lm/download_lm_en.sh | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/models/aishell/download_model.sh b/models/aishell/download_model.sh index 072fc6fa4..1c4be79fa 100644 --- a/models/aishell/download_model.sh +++ b/models/aishell/download_model.sh @@ -2,7 +2,7 @@ . ../../utils/utility.sh -URL='http://cloud.dlnel.org/filepub/?uuid=61de63b9-6904-4809-ad95-0cc5104ab973' +URL='https://deepspeech.bj.bcebos.com/mandarin_models/aishell_model.tar.gz' MD5=0ee83aa15fba421e5de8fc66c8feb350 TARGET=./aishell_model.tar.gz diff --git a/models/baidu_en8k/download_model.sh b/models/baidu_en8k/download_model.sh index 796b6cb9d..9ce672825 100644 --- a/models/baidu_en8k/download_model.sh +++ b/models/baidu_en8k/download_model.sh @@ -2,7 +2,7 @@ . ../../utils/utility.sh -URL='http://cloud.dlnel.org/filepub/?uuid=37a1c211-ec47-494c-973c-31437a10ae90' +URL='https://deepspeech.bj.bcebos.com/demo_models/baidu_en8k_model.tar.gz' MD5=5fe7639e720d51b3c3bdf7a1470c6272 TARGET=./baidu_en8k_model.tar.gz diff --git a/models/librispeech/download_model.sh b/models/librispeech/download_model.sh index 305c082ad..123bcb818 100644 --- a/models/librispeech/download_model.sh +++ b/models/librispeech/download_model.sh @@ -2,7 +2,7 @@ . ../../utils/utility.sh -URL='http://cloud.dlnel.org/filepub/?uuid=117cde63-cd59-4948-8b80-df782555f7d6' +URL='https://deepspeech.bj.bcebos.com/eng_models/librispeech_model.tar.gz' MD5=1f72d0c5591f453362f0caa09dd57618 TARGET=./librispeech_model.tar.gz diff --git a/models/lm/download_lm_ch.sh b/models/lm/download_lm_ch.sh index d94e286b8..0e4915262 100644 --- a/models/lm/download_lm_ch.sh +++ b/models/lm/download_lm_ch.sh @@ -2,7 +2,7 @@ . ../../utils/utility.sh -URL=http://cloud.dlnel.org/filepub/?uuid=5cd1688e-78d9-4b9e-9c2f-6f104bd5b518 +URL='https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm' MD5="29e02312deb2e59b3c8686c7966d4fe3" TARGET=./zh_giga.no_cna_cmn.prune01244.klm diff --git a/models/lm/download_lm_en.sh b/models/lm/download_lm_en.sh index d131636e8..cc8d32035 100644 --- a/models/lm/download_lm_en.sh +++ b/models/lm/download_lm_en.sh @@ -2,7 +2,7 @@ . ../../utils/utility.sh -URL=http://paddlepaddle.bj.bcebos.com/model_zoo/speech/common_crawl_00.prune01111.trie.klm +URL=https://deepspeech.bj.bcebos.com/en_lm/common_crawl_00.prune01111.trie.klm MD5="099a601759d467cd0a8523ff939819c5" TARGET=./common_crawl_00.prune01111.trie.klm From f19a58c2bf402a3a2c1cd153f4f25f71527cb6f2 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Thu, 15 Aug 2019 17:15:31 +0800 Subject: [PATCH 332/335] Update README.md --- README.md | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/README.md b/README.md index 6e7dd2a44..dc2aca2bf 100644 --- a/README.md +++ b/README.md @@ -22,26 +22,7 @@ ## Installation -To avoid the trouble of environment setup, [running in Docker container](#running-in-docker-container) is highly recommended. Otherwise follow the guidelines below to install the dependencies manually. - -### Prerequisites -- Python 2.7 only supported -- PaddlePaddle the latest version (please refer to the [Installation Guide](https://github.com/PaddlePaddle/Paddle#installation)) - -### Setup -- Make sure these libraries or tools installed: `pkg-config`, `flac`, `ogg`, `vorbis`, `boost` and `swig`, e.g. installing them via `apt-get`: - -```bash -sudo apt-get install -y pkg-config libflac-dev libogg-dev libvorbis-dev libboost-dev swig -``` - -- Run the setup script for the remaining dependencies - -```bash -git clone https://github.com/PaddlePaddle/DeepSpeech.git -cd DeepSpeech -sh setup.sh -``` +For this project was developed in PaddlePaddle V2 API, which is not maintained officially any more, we suggest [running it in Docker image](#running-in-docker-container), instead of building environment from source code. And we are going to release the update to the latest Paddle Fluid API very soon, please keep an eye on this project. ## Getting Started From 13ec49de9582a916bbaf596fa2b83f4dcf64d9b9 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Thu, 15 Aug 2019 17:19:02 +0800 Subject: [PATCH 333/335] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index dc2aca2bf..30ff61ad1 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ ## Installation -For this project was developed in PaddlePaddle V2 API, which is not maintained officially any more, we suggest [running it in Docker image](#running-in-docker-container), instead of building environment from source code. And we are going to release the update to the latest Paddle Fluid API very soon, please keep an eye on this project. +For this project was developed in PaddlePaddle V2 API, which is not maintained officially any more, we only support [running it in Docker image](#running-in-docker-container), instead of building environment from source code. And we are going to release the update to the latest Paddle Fluid API very soon, please keep an eye on this project. ## Getting Started From b8a6d3b969496dc860c250bf7b4b92c77fadbe76 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Thu, 15 Aug 2019 17:19:48 +0800 Subject: [PATCH 334/335] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 30ff61ad1..0dcf8b602 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ ## Installation -For this project was developed in PaddlePaddle V2 API, which is not maintained officially any more, we only support [running it in Docker image](#running-in-docker-container), instead of building environment from source code. And we are going to release the update to the latest Paddle Fluid API very soon, please keep an eye on this project. +For this project was developed in PaddlePaddle V2 API, which is not maintained officially any more, we only support [running it in Docker container](#running-in-docker-container), instead of building environment from source code. And we are going to release the update to the latest Paddle Fluid API very soon, please keep an eye on this project. ## Getting Started From 334c10a4e68016ab2aacd0c759697d6360b8b41b Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Thu, 15 Aug 2019 18:23:58 +0800 Subject: [PATCH 335/335] Update README_cn.md --- README_cn.md | 24 +++--------------------- 1 file changed, 3 insertions(+), 21 deletions(-) diff --git a/README_cn.md b/README_cn.md index c2304dcb6..06bee58bf 100644 --- a/README_cn.md +++ b/README_cn.md @@ -1,6 +1,6 @@ -# 语音识别: DeepSpeech2 +# DeepSpeech2 -*语音识别: DeepSpeech2*是一个采用[PaddlePaddle](https://github.com/PaddlePaddle/Paddle)平台的端到端自动语音识别(ASR)引擎的开源项目,具体原理参考这篇论文[Baidu's Deep Speech 2 paper](http://proceedings.mlr.press/v48/amodei16.pdf)。 +*DeepSpeech2* 是一个采用[PaddlePaddle](https://github.com/PaddlePaddle/Paddle)平台的端到端自动语音识别(ASR)引擎的开源项目,具体原理请参考这篇论文[Baidu's Deep Speech 2 paper](http://proceedings.mlr.press/v48/amodei16.pdf)。 我们的愿景是为语音识别在工业应用和学术研究上,提供易于使用、高效和可扩展的工具,包括训练,推理,测试模块,以及分布式的[PaddleCloud](https://github.com/PaddlePaddle/cloud)训练和demo部署。同时,我们还将发布一些预训练好的英语和普通话模型。 ## 目录 @@ -20,26 +20,8 @@ - [问题和帮助](#问题和帮助) ## 安装 -为了避免环境配置问题,强烈建议在[Docker容器上运行](#在Docker容器上运行),另外请按照下面的指南安装依赖项。 -### 前提 -- 只支持Python 2.7 -- PaddlePaddle最新版本(请参考[安装指南](https://github.com/PaddlePaddle/Paddle#installation)) - -### 安装 -- 请确保以下库或工具已安装完毕:`pkg-config`, `flac`, `ogg`, `vorbis`, `boost` 和 `swig`, 以上可以通过`apt-get`安装: - -```bash -sudo apt-get install -y pkg-config libflac-dev libogg-dev libvorbis-dev libboost-dev swig -``` - -- 为剩下的依赖项运行安装脚本 - -```bash -git clone https://github.com/PaddlePaddle/DeepSpeech.git -cd DeepSpeech -sh setup.sh -``` +因该项目基于 PaddlePaddle V2 API 开发,其已不再被官方维护,目前我们仅支持 [在 Docker 容器中运行该项目](#在Docker容器上运行),而不支持从源码构建环境。我们很快会将这个项目升级到最新的 Paddle Fluid API,请保持关注。 ## 开始