From a0843941281f833010157f8f8680fe7a1a8fc2dd Mon Sep 17 00:00:00 2001 From: yangyaming Date: Fri, 3 Nov 2017 14:52:10 +0800 Subject: [PATCH] Add doc and adjust some codes. --- data_utils/data.py | 4 ++-- infer.py | 4 +--- test.py | 4 +--- tools/tune.py | 4 +--- 4 files changed, 5 insertions(+), 11 deletions(-) diff --git a/data_utils/data.py b/data_utils/data.py index edd4047e..70ee6fba 100644 --- a/data_utils/data.py +++ b/data_utils/data.py @@ -103,8 +103,8 @@ class DataGenerator(object): :type filename: basestring | file :param transcript: Transcription text. :type transcript: basestring - :return: Tuple of audio feature tensor and list of token ids for - transcription. + :return: Tuple of audio feature tensor and data of transcription part, + where transcription part could be token ids or text. :rtype: tuple of (2darray, list) """ if filename.startswith('tar:'): diff --git a/infer.py b/infer.py index 74524602..9ac3e632 100644 --- a/infer.py +++ b/infer.py @@ -103,9 +103,7 @@ def infer(): num_processes=args.num_proc_bsearch) error_rate_func = cer if args.error_rate_type == 'cer' else wer - target_transcripts = [ - transcript for _, transcript in infer_data - ] + target_transcripts = [transcript for _, transcript in infer_data] for target, result in zip(target_transcripts, result_transcripts): print("\nTarget Transcription: %s\nOutput Transcription: %s" % (target, result)) diff --git a/test.py b/test.py index 5466f960..63fc4f65 100644 --- a/test.py +++ b/test.py @@ -104,9 +104,7 @@ def evaluate(): vocab_list=vocab_list, language_model_path=args.lang_model_path, num_processes=args.num_proc_bsearch) - target_transcripts = [ - transcript for _, transcript in infer_data - ] + target_transcripts = [transcript for _, transcript in infer_data] for target, result in zip(target_transcripts, result_transcripts): error_sum += error_rate_func(target, result) num_ins += 1 diff --git a/tools/tune.py b/tools/tune.py index 99ffb5f5..966029a8 100644 --- a/tools/tune.py +++ b/tools/tune.py @@ -164,9 +164,7 @@ def tune(): for i in xrange(len(infer_data)) ] - target_transcripts = [ - transcript for _, transcript in infer_data - ] + target_transcripts = [transcript for _, transcript in infer_data] num_ins += len(target_transcripts) # grid search