From 9d5eb74066f9af016abb6c6c5c45440c257b6f77 Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Wed, 22 Sep 2021 12:49:22 +0000 Subject: [PATCH] fix decode json file --- deepspeech/exps/deepspeech2/model.py | 4 +-- deepspeech/exps/u2/model.py | 4 +-- deepspeech/exps/u2_kaldi/model.py | 4 +-- deepspeech/exps/u2_st/model.py | 4 +-- utils/README.md | 3 ++- utils/dump_manifest.py | 0 utils/filter.py | 37 +++++++++------------------- 7 files changed, 22 insertions(+), 34 deletions(-) mode change 100644 => 100755 utils/dump_manifest.py mode change 100644 => 100755 utils/filter.py diff --git a/deepspeech/exps/deepspeech2/model.py b/deepspeech/exps/deepspeech2/model.py index 646f6f236..79a676345 100644 --- a/deepspeech/exps/deepspeech2/model.py +++ b/deepspeech/exps/deepspeech2/model.py @@ -18,8 +18,8 @@ from collections import defaultdict from contextlib import nullcontext from pathlib import Path from typing import Optional -import jsonlines +import jsonlines import numpy as np import paddle from paddle import distributed as dist @@ -306,7 +306,7 @@ class DeepSpeech2Tester(DeepSpeech2Trainer): len_refs += len_ref num_ins += 1 if fout: - fout.write({"utt": utt, "ref", target, "hyp": result}) + fout.write({"utt": utt, "ref": target, "hyp": result}) logger.info(f"Utt: {utt}") logger.info(f"Ref: {target}") logger.info(f"Hyp: {result}") diff --git a/deepspeech/exps/u2/model.py b/deepspeech/exps/u2/model.py index f1970334d..5cb0962a7 100644 --- a/deepspeech/exps/u2/model.py +++ b/deepspeech/exps/u2/model.py @@ -21,8 +21,8 @@ from collections import OrderedDict from contextlib import nullcontext from pathlib import Path from typing import Optional -import jsonlines +import jsonlines import numpy as np import paddle from paddle import distributed as dist @@ -467,7 +467,7 @@ class U2Tester(U2Trainer): len_refs += len_ref num_ins += 1 if fout: - fout.write({"utt": utt, "ref", target, "hyp": result}) + fout.write({"utt": utt, "ref": target, "hyp": result}) logger.info(f"Utt: {utt}") logger.info(f"Ref: {target}") logger.info(f"Hyp: {result}") diff --git a/deepspeech/exps/u2_kaldi/model.py b/deepspeech/exps/u2_kaldi/model.py index 00d780817..d38afe25c 100644 --- a/deepspeech/exps/u2_kaldi/model.py +++ b/deepspeech/exps/u2_kaldi/model.py @@ -20,8 +20,8 @@ from collections import defaultdict from contextlib import nullcontext from pathlib import Path from typing import Optional -import jsonlines +import jsonlines import numpy as np import paddle from paddle import distributed as dist @@ -446,7 +446,7 @@ class U2Tester(U2Trainer): len_refs += len_ref num_ins += 1 if fout: - fout.write({"utt": utt, "ref", target, "hyp": result}) + fout.write({"utt": utt, "ref": target, "hyp": result}) logger.info(f"Utt: {utt}") logger.info(f"Ref: {target}") logger.info(f"Hyp: {result}") diff --git a/deepspeech/exps/u2_st/model.py b/deepspeech/exps/u2_st/model.py index 86bb649bf..e4e70292c 100644 --- a/deepspeech/exps/u2_st/model.py +++ b/deepspeech/exps/u2_st/model.py @@ -20,8 +20,8 @@ from collections import defaultdict from contextlib import nullcontext from pathlib import Path from typing import Optional -import jsonlines +import jsonlines import numpy as np import paddle from paddle import distributed as dist @@ -480,7 +480,7 @@ class U2STTester(U2STTrainer): len_refs += len(target.split()) num_ins += 1 if fout: - fout.write({"utt": utt, "ref", target, "hyp": result}) + fout.write({"utt": utt, "ref": target, "hyp": result}) logger.info(f"Utt: {utt}") logger.info(f"Ref: {target}") logger.info(f"Hyp: {result}") diff --git a/utils/README.md b/utils/README.md index d48faf699..163be850f 100644 --- a/utils/README.md +++ b/utils/README.md @@ -1,3 +1,4 @@ # Utils -* [kaldi utils](https://github.com/kaldi-asr/kaldi/blob/cbed4ff688/egs/wsj/s5/utils) \ No newline at end of file +* [kaldi utils](https://github.com/kaldi-asr/kaldi/blob/cbed4ff688/egs/wsj/s5/utils) +* [espnet utils)(https://github.com/espnet/espnet/tree/master/utils) diff --git a/utils/dump_manifest.py b/utils/dump_manifest.py old mode 100644 new mode 100755 diff --git a/utils/filter.py b/utils/filter.py old mode 100644 new mode 100755 index d31eab4d7..e32e135fc --- a/utils/filter.py +++ b/utils/filter.py @@ -1,7 +1,5 @@ #!/usr/bin/env python3 - # Apache 2.0 - import argparse import codecs import sys @@ -12,15 +10,13 @@ is_python2 = sys.version_info[0] == 2 def get_parser(): parser = argparse.ArgumentParser( description="filter words in a text file", - formatter_class=argparse.ArgumentDefaultsHelpFormatter, - ) + formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument( "--exclude", "-v", dest="exclude", action="store_true", - help="exclude filter words", - ) + help="exclude filter words", ) parser.add_argument("filt", type=str, help="filter list") parser.add_argument("infile", type=str, help="input file") return parser @@ -37,29 +33,20 @@ def filter_file(infile, filt, exclude): for line in vocabfile: vocab.add(line.strip()) - sys.stdout = codecs.getwriter("utf-8")( - sys.stdout if is_python2 else sys.stdout.buffer - ) + sys.stdout = codecs.getwriter("utf-8")(sys.stdout + if is_python2 else sys.stdout.buffer) with codecs.open(infile, "r", encoding="utf-8") as textfile: for line in textfile: if exclude: - print( - " ".join( - map( - lambda word: word if word not in vocab else "", - line.strip().split(), - ) - ) - ) + print(" ".join( + map( + lambda word: word if word not in vocab else "", + line.strip().split(), ))) else: - print( - " ".join( - map( - lambda word: word if word in vocab else "", - line.strip().split(), - ) - ) - ) + print(" ".join( + map( + lambda word: word if word in vocab else "", + line.strip().split(), ))) if __name__ == "__main__":