Update argument naming following Yibing's reviews.

pull/2/head
Xinghai Sun 7 years ago
parent 9571b6fc0e
commit e8f7a8fde1

@ -25,7 +25,7 @@ add_arg('rnn_layer_size', int, 2048, "# of recurrent cells per layer.")
add_arg('alpha', float, 0.36, "Coef of LM for beam search.")
add_arg('beta', float, 0.25, "Coef of WC for beam search.")
add_arg('cutoff_prob', float, 0.99, "Cutoff probability for pruning.")
add_arg('use_gru', bool, False, "Use GRUs instead of Simple RNNs.")
add_arg('use_gru', bool, False, "Use GRUs instead of simple RNNs.")
add_arg('use_gpu', bool, True, "Use GPU or not.")
add_arg('share_rnn_weights',bool, True, "Share input-hidden weights across "
"bi-directional RNNs. Not for GRU.")
@ -51,9 +51,9 @@ add_arg('model_path', str,
add_arg('lang_model_path', str,
'lm/data/common_crawl_00.prune01111.trie.klm',
"Filepath for language model.")
add_arg('decoder_method', str,
add_arg('decoding_method', str,
'ctc_beam_search',
"Decoder method. Options: ctc_beam_search, ctc_greedy",
"Decoding method. Options: ctc_beam_search, ctc_greedy",
choices = ['ctc_beam_search', 'ctc_greedy'])
add_arg('specgram_type', str,
'linear',
@ -160,7 +160,7 @@ def start_server():
feature = data_generator.process_utterance(filename, "")
result_transcript = ds2_model.infer_batch(
infer_data=[feature],
decoder_method=args.decoder_method,
decoding_method=args.decoding_method,
beam_alpha=args.alpha,
beam_beta=args.beta,
beam_size=args.beam_size,

@ -17,15 +17,15 @@ add_arg = functools.partial(add_arguments, argparser=parser)
add_arg('batch_size', int, 128, "Minibatch size.")
add_arg('trainer_count', int, 8, "# of Trainers (CPUs or GPUs).")
add_arg('beam_size', int, 500, "Beam search width.")
add_arg('parallels_bsearch',int, 12, "# of CPUs for beam search.")
add_arg('parallels_data', int, 12, "# of CPUs for data preprocessing.")
add_arg('num_proc_bsearch', int, 12, "# of CPUs for beam search.")
add_arg('num_proc_data', int, 12, "# of CPUs for data preprocessing.")
add_arg('num_conv_layers', int, 2, "# of convolution layers.")
add_arg('num_rnn_layers', int, 3, "# of recurrent layers.")
add_arg('rnn_layer_size', int, 2048, "# of recurrent cells per layer.")
add_arg('alpha', float, 0.36, "Coef of LM for beam search.")
add_arg('beta', float, 0.25, "Coef of WC for beam search.")
add_arg('cutoff_prob', float, 0.99, "Cutoff probability for pruning.")
add_arg('use_gru', bool, False, "Use GRUs instead of Simple RNNs.")
add_arg('use_gru', bool, False, "Use GRUs instead of simple RNNs.")
add_arg('use_gpu', bool, True, "Use GPU or not.")
add_arg('share_rnn_weights',bool, True, "Share input-hidden weights across "
"bi-directional RNNs. Not for GRU.")
@ -45,9 +45,9 @@ add_arg('model_path', str,
add_arg('lang_model_path', str,
'lm/data/common_crawl_00.prune01111.trie.klm',
"Filepath for language model.")
add_arg('decoder_method', str,
add_arg('decoding_method', str,
'ctc_beam_search',
"Decoder method. Options: ctc_beam_search, ctc_greedy",
"Decoding method. Options: ctc_beam_search, ctc_greedy",
choices = ['ctc_beam_search', 'ctc_greedy'])
add_arg('error_rate_type', str,
'wer',
@ -68,7 +68,7 @@ def evaluate():
mean_std_filepath=args.mean_std_path,
augmentation_config='{}',
specgram_type=args.specgram_type,
num_threads=args.parallels_data)
num_threads=args.num_proc_data)
batch_reader = data_generator.batch_reader_creator(
manifest_path=args.test_manifest,
batch_size=args.batch_size,
@ -90,14 +90,14 @@ def evaluate():
for infer_data in batch_reader():
result_transcripts = ds2_model.infer_batch(
infer_data=infer_data,
decoder_method=args.decoder_method,
decoding_method=args.decoding_method,
beam_alpha=args.alpha,
beam_beta=args.beta,
beam_size=args.beam_size,
cutoff_prob=args.cutoff_prob,
vocab_list=data_generator.vocab_list,
language_model_path=args.lang_model_path,
num_processes=args.parallels_bsearch)
num_processes=args.num_proc_bsearch)
target_transcripts = [
''.join([data_generator.vocab_list[token] for token in transcript])
for _, transcript in infer_data

@ -17,14 +17,14 @@ add_arg = functools.partial(add_arguments, argparser=parser)
add_arg('num_samples', int, 10, "# of samples to infer.")
add_arg('trainer_count', int, 8, "# of Trainers (CPUs or GPUs).")
add_arg('beam_size', int, 500, "Beam search width.")
add_arg('parallels_bsearch',int, 12, "# of CPUs for beam search.")
add_arg('num_proc_bsearch', int, 12, "# of CPUs for beam search.")
add_arg('num_conv_layers', int, 2, "# of convolution layers.")
add_arg('num_rnn_layers', int, 3, "# of recurrent layers.")
add_arg('rnn_layer_size', int, 2048, "# of recurrent cells per layer.")
add_arg('alpha', float, 0.36, "Coef of LM for beam search.")
add_arg('beta', float, 0.25, "Coef of WC for beam search.")
add_arg('cutoff_prob', float, 0.99, "Cutoff probability for pruning.")
add_arg('use_gru', bool, False, "Use GRUs instead of Simple RNNs.")
add_arg('use_gru', bool, False, "Use GRUs instead of simple RNNs.")
add_arg('use_gpu', bool, True, "Use GPU or not.")
add_arg('share_rnn_weights',bool, True, "Share input-hidden weights across "
"bi-directional RNNs. Not for GRU.")
@ -44,9 +44,9 @@ add_arg('model_path', str,
'./checkpoints/params.latest.tar.gz',
"If None, the training starts from scratch, "
"otherwise, it resumes from the pre-trained model.")
add_arg('decoder_method', str,
add_arg('decoding_method', str,
'ctc_beam_search',
"Decoder method. Options: ctc_beam_search, ctc_greedy",
"Decoding method. Options: ctc_beam_search, ctc_greedy",
choices = ['ctc_beam_search', 'ctc_greedy'])
add_arg('error_rate_type', str,
'wer',
@ -86,14 +86,14 @@ def infer():
share_rnn_weights=args.share_rnn_weights)
result_transcripts = ds2_model.infer_batch(
infer_data=infer_data,
decoder_method=args.decoder_method,
decoding_method=args.decoding_method,
beam_alpha=args.alpha,
beam_beta=args.beta,
beam_size=args.beam_size,
cutoff_prob=args.cutoff_prob,
vocab_list=data_generator.vocab_list,
language_model_path=args.lang_model_path,
num_processes=args.parallels_bsearch)
num_processes=args.num_proc_bsearch)
error_rate_func = cer if args.error_rate_type == 'cer' else wer
target_transcripts = [

@ -146,7 +146,7 @@ class DeepSpeech2Model(object):
# run inference
return self._loss_inferer.infer(input=infer_data)
def infer_batch(self, infer_data, decoder_method, beam_alpha, beam_beta,
def infer_batch(self, infer_data, decoding_method, beam_alpha, beam_beta,
beam_size, cutoff_prob, vocab_list, language_model_path,
num_processes):
"""Model inference. Infer the transcription for a batch of speech
@ -156,9 +156,9 @@ class DeepSpeech2Model(object):
consisting of a tuple of audio features and
transcription text (empty string).
:type infer_data: list
:param decoder_method: Decoding method name, 'ctc_greedy' or
'ctc_beam_search'.
:param decoder_method: string
:param decoding_method: Decoding method name, 'ctc_greedy' or
'ctc_beam_search'.
:param decoding_method: string
:param beam_alpha: Parameter associated with language model.
:type beam_alpha: float
:param beam_beta: Parameter associated with word count.
@ -190,13 +190,13 @@ class DeepSpeech2Model(object):
]
# run decoder
results = []
if decoder_method == "ctc_greedy":
if decoding_method == "ctc_greedy":
# best path decode
for i, probs in enumerate(probs_split):
output_transcription = ctc_greedy_decoder(
probs_seq=probs, vocabulary=vocab_list)
results.append(output_transcription)
elif decoder_method == "ctc_beam_search":
elif decoding_method == "ctc_beam_search":
# initialize external scorer
if self._ext_scorer == None:
self._ext_scorer = LmScorer(beam_alpha, beam_beta,
@ -217,8 +217,8 @@ class DeepSpeech2Model(object):
results = [result[0][1] for result in beam_search_results]
else:
raise ValueError("Decoder method [%s] is not supported." %
decoder_method)
raise ValueError("Decoding method [%s] is not supported." %
decoding_method)
return results
def _create_parameters(self, model_path=None):

@ -16,7 +16,7 @@ add_arg = functools.partial(add_arguments, argparser=parser)
add_arg('batch_size', int, 256, "Minibatch size.")
add_arg('trainer_count', int, 8, "# of Trainers (CPUs or GPUs).")
add_arg('num_passes', int, 200, "# of training epochs.")
add_arg('parallels_data', int, 12, "# of CPUs for data preprocessing.")
add_arg('num_proc_data', int, 12, "# of CPUs for data preprocessing.")
add_arg('num_conv_layers', int, 2, "# of convolution layers.")
add_arg('num_rnn_layers', int, 3, "# of recurrent layers.")
add_arg('rnn_layer_size', int, 2048, "# of recurrent cells per layer.")
@ -28,7 +28,7 @@ add_arg('min_duration', float, 0.0, "Shortest audio duration allowed.")
add_arg('use_sortagrad', bool, True, "Use SortaGrad or not.")
add_arg('use_gpu', bool, True, "Use GPU or not.")
add_arg('is_local', bool, True, "Use pserver or not.")
add_arg('use_gru', bool, False, "Use GRUs instead of Simple RNNs.")
add_arg('use_gru', bool, False, "Use GRUs instead of simple RNNs.")
add_arg('share_rnn_weights',bool, True, "Share input-hidden weights across "
"bi-directional RNNs. Not for GRU.")
add_arg('train_manifest', str,
@ -74,13 +74,13 @@ def train():
max_duration=args.max_duration,
min_duration=args.min_duration,
specgram_type=args.specgram_type,
num_threads=args.parallels_data)
num_threads=args.num_proc_data)
dev_generator = DataGenerator(
vocab_filepath=args.vocab_path,
mean_std_filepath=args.mean_std_path,
augmentation_config="{}",
specgram_type=args.specgram_type,
num_threads=args.parallels_data)
num_threads=args.num_proc_data)
train_batch_reader = train_generator.batch_reader_creator(
manifest_path=args.train_manifest,
batch_size=args.batch_size,

@ -18,7 +18,7 @@ add_arg = functools.partial(add_arguments, argparser=parser)
add_arg('num_samples', int, 100, "# of samples to infer.")
add_arg('trainer_count', int, 8, "# of Trainers (CPUs or GPUs).")
add_arg('beam_size', int, 500, "Beam search width.")
add_arg('parallels_bsearch',int, 12, "# of CPUs for beam search.")
add_arg('num_proc_bsearch', int, 12, "# of CPUs for beam search.")
add_arg('num_conv_layers', int, 2, "# of convolution layers.")
add_arg('num_rnn_layers', int, 3, "# of recurrent layers.")
add_arg('rnn_layer_size', int, 2048, "# of recurrent cells per layer.")
@ -29,7 +29,7 @@ add_arg('alpha_to', float, 0.36, "Where alpha ends tuning with.")
add_arg('beta_from', float, 0.05, "Where beta starts tuning from.")
add_arg('beta_to', float, 0.36, "Where beta ends tuning with.")
add_arg('cutoff_prob', float, 0.99, "Cutoff probability for pruning.")
add_arg('use_gru', bool, False, "Use GRUs instead of Simple RNNs.")
add_arg('use_gru', bool, False, "Use GRUs instead of simple RNNs.")
add_arg('use_gpu', bool, True, "Use GPU or not.")
add_arg('share_rnn_weights',bool, True, "Share input-hidden weights across "
"bi-directional RNNs. Not for GRU.")
@ -104,14 +104,14 @@ def tune():
for alpha, beta in params_grid:
result_transcripts = ds2_model.infer_batch(
infer_data=tune_data,
decoder_method='ctc_beam_search',
decoding_method='ctc_beam_search',
beam_alpha=alpha,
beam_beta=beta,
beam_size=args.beam_size,
cutoff_prob=args.cutoff_prob,
vocab_list=data_generator.vocab_list,
language_model_path=args.lang_model_path,
num_processes=args.parallels_bsearch)
num_processes=args.num_proc_bsearch)
wer_sum, num_ins = 0.0, 0
for target, result in zip(target_transcripts, result_transcripts):
wer_sum += wer(target, result)

Loading…
Cancel
Save