Add doc and adjust some codes.

pull/2/head
yangyaming 7 years ago
parent 39dbcb4dfb
commit a084394128

@ -103,8 +103,8 @@ class DataGenerator(object):
:type filename: basestring | file :type filename: basestring | file
:param transcript: Transcription text. :param transcript: Transcription text.
:type transcript: basestring :type transcript: basestring
:return: Tuple of audio feature tensor and list of token ids for :return: Tuple of audio feature tensor and data of transcription part,
transcription. where transcription part could be token ids or text.
:rtype: tuple of (2darray, list) :rtype: tuple of (2darray, list)
""" """
if filename.startswith('tar:'): if filename.startswith('tar:'):

@ -103,9 +103,7 @@ def infer():
num_processes=args.num_proc_bsearch) num_processes=args.num_proc_bsearch)
error_rate_func = cer if args.error_rate_type == 'cer' else wer error_rate_func = cer if args.error_rate_type == 'cer' else wer
target_transcripts = [ target_transcripts = [transcript for _, transcript in infer_data]
transcript for _, transcript in infer_data
]
for target, result in zip(target_transcripts, result_transcripts): for target, result in zip(target_transcripts, result_transcripts):
print("\nTarget Transcription: %s\nOutput Transcription: %s" % print("\nTarget Transcription: %s\nOutput Transcription: %s" %
(target, result)) (target, result))

@ -104,9 +104,7 @@ def evaluate():
vocab_list=vocab_list, vocab_list=vocab_list,
language_model_path=args.lang_model_path, language_model_path=args.lang_model_path,
num_processes=args.num_proc_bsearch) num_processes=args.num_proc_bsearch)
target_transcripts = [ target_transcripts = [transcript for _, transcript in infer_data]
transcript for _, transcript in infer_data
]
for target, result in zip(target_transcripts, result_transcripts): for target, result in zip(target_transcripts, result_transcripts):
error_sum += error_rate_func(target, result) error_sum += error_rate_func(target, result)
num_ins += 1 num_ins += 1

@ -164,9 +164,7 @@ def tune():
for i in xrange(len(infer_data)) for i in xrange(len(infer_data))
] ]
target_transcripts = [ target_transcripts = [transcript for _, transcript in infer_data]
transcript for _, transcript in infer_data
]
num_ins += len(target_transcripts) num_ins += len(target_transcripts)
# grid search # grid search

Loading…
Cancel
Save