diff --git a/deepspeech/exps/u2/model.py b/deepspeech/exps/u2/model.py index 00f4f5ec4..f166a071e 100644 --- a/deepspeech/exps/u2/model.py +++ b/deepspeech/exps/u2/model.py @@ -450,7 +450,7 @@ class U2Tester(U2Trainer): logger.info(msg) # test meta results - err_meta_path = os.path.splitext(self.args.checkpoint_path)[0] + '.err' + err_meta_path = os.path.splitext(self.args.result_file)[0] + '.err' err_type_str = "{}".format(error_rate_type) with open(err_meta_path, 'w') as f: data = json.dumps({ @@ -471,6 +471,8 @@ class U2Tester(U2Trainer): errors_sum, "ref_len": len_refs, + "decode_method": + self.config.decoding.decoding_method, }) f.write(data + '\n') diff --git a/examples/aishell/s1/README.md b/examples/aishell/s1/README.md new file mode 100644 index 000000000..9bfa45c95 --- /dev/null +++ b/examples/aishell/s1/README.md @@ -0,0 +1,14 @@ +# Aishell + +## Conformer +| Model | Config | Augmentation| Test set | Decode method | Loss | WER | +| --- | --- | --- | --- | --- | --- | +| conformer | conf/conformer.yaml | spec_aug + shift | test | attention | - | 0.059858 | +| conformer | conf/conformer.yaml | spec_aug + shift | test | ctc_greedy_search | - | 0.062311 | +| conformer | conf/conformer.yaml | spec_aug + shift | test | ctc_prefix_beam_search | - | 0.062196 | +| conformer | conf/conformer.yaml | spec_aug + shift | test | attention_rescoring | - | 0.054694 | + +## Transformer +| Model | Config | Augmentation| Test set | Decode method | Loss | WER | +| --- | --- | --- | --- | --- | --- | +| transformer | conf/transformer.yaml | spec_aug + shift | test | attention | - | - | diff --git a/examples/aishell/s1/local/test.sh b/examples/aishell/s1/local/test.sh index 6d1139862..0dfabc6e4 100755 --- a/examples/aishell/s1/local/test.sh +++ b/examples/aishell/s1/local/test.sh @@ -21,17 +21,39 @@ ckpt_prefix=$2 # exit 1 #fi -python3 -u ${BIN_DIR}/test.py \ ---device ${device} \ ---nproc 1 \ ---config ${config_path} \ ---result_file ${ckpt_prefix}.rsl \ ---checkpoint_path ${ckpt_prefix} - -if [ $? -ne 0 ]; then - echo "Failed in evaluation!" - exit 1 -fi +for type in attention ctc_greedy_search; do + echo "decoding ${type}" + batch_size=64 + python3 -u ${BIN_DIR}/test.py \ + --device ${device} \ + --nproc 1 \ + --config ${config_path} \ + --result_file ${ckpt_prefix}.${type}.rsl \ + --checkpoint_path ${ckpt_prefix} \ + --opts decoding.decoding_method ${type} decoding.batch_size ${batch_size} + + if [ $? -ne 0 ]; then + echo "Failed in evaluation!" + exit 1 + fi +done + +for type in ctc_prefix_beam_search attention_rescoring; do + echo "decoding ${type}" + batch_size=1 + python3 -u ${BIN_DIR}/test.py \ + --device ${device} \ + --nproc 1 \ + --config ${config_path} \ + --result_file ${ckpt_prefix}.${type}.rsl \ + --checkpoint_path ${ckpt_prefix} \ + --opts decoding.decoding_method ${type} decoding.batch_size ${batch_size} + + if [ $? -ne 0 ]; then + echo "Failed in evaluation!" + exit 1 + fi +done exit 0 diff --git a/examples/librispeech/README.md b/examples/librispeech/README.md index f46749b7f..c351c1f65 100644 --- a/examples/librispeech/README.md +++ b/examples/librispeech/README.md @@ -1,3 +1,3 @@ # ASR * s0 is for deepspeech2 -* s1 is for U2 +* s1 is for transformer/conformer/U2 diff --git a/examples/librispeech/s1/README.md b/examples/librispeech/s1/README.md index e69de29bb..8fbbe9d77 100644 --- a/examples/librispeech/s1/README.md +++ b/examples/librispeech/s1/README.md @@ -0,0 +1,16 @@ +# LibriSpeech + +## Conformer +| Model | Config | Augmentation| Test set | Decode method | Loss | WER | +| --- | --- | --- | --- | --- | --- | +| conformer | conf/conformer.yaml | spec_aug + shift | test-all | attention | test-all 6.35 | 0.057117 | +| conformer | conf/conformer.yaml | spec_aug + shift | test-clean | attention | test-all 6.35 | 0.030162 | +| conformer | conf/conformer.yaml | spec_aug + shift | test-clean | ctc_greedy_search | test-all 6.35 | 0.037910 | +| conformer | conf/conformer.yaml | spec_aug + shift | test-clean | ctc_prefix_beam_search | test-all 6.35 | 0.037761 | +| conformer | conf/conformer.yaml | spec_aug + shift | test-clean | attention_rescoring | test-all 6.35 | 0.032115 | + +## Transformer +| Model | Config | Augmentation| Test set | Decode method | Loss | WER | +| --- | --- | --- | --- | --- | --- | +| transformer | conf/transformer.yaml | spec_aug + shift | test-all | attention | test-all 6.98 | 0.066500 | +| transformer | conf/transformer.yaml | spec_aug + shift | test-clean | attention | test-all 6.98 | 0.036 | diff --git a/examples/librispeech/s1/conf/conformer.yaml b/examples/librispeech/s1/conf/conformer.yaml index c83b0aeb8..94e1b2d48 100644 --- a/examples/librispeech/s1/conf/conformer.yaml +++ b/examples/librispeech/s1/conf/conformer.yaml @@ -14,7 +14,7 @@ data: min_output_len: 0.0 # tokens max_output_len: 400.0 # tokens min_output_input_ratio: 0.05 - max_output_input_ratio: 10.0 + max_output_input_ratio: .inf raw_wav: True # use raw_wav or kaldi feature specgram_type: fbank #linear, mfcc, fbank feat_dim: 80 @@ -77,7 +77,7 @@ model: training: n_epoch: 120 accum_grad: 8 - global_grad_clip: 5.0 + global_grad_clip: 3.0 optim: adam optim_conf: lr: 0.004 diff --git a/examples/librispeech/s1/local/test.sh b/examples/librispeech/s1/local/test.sh index 890f95967..8c323e002 100755 --- a/examples/librispeech/s1/local/test.sh +++ b/examples/librispeech/s1/local/test.sh @@ -28,7 +28,7 @@ for type in attention ctc_greedy_search; do --device ${device} \ --nproc 1 \ --config ${config_path} \ - --result_file ${ckpt_prefix}.rsl \ + --result_file ${ckpt_prefix}.${type}.rsl \ --checkpoint_path ${ckpt_prefix} \ --opts decoding.decoding_method ${type} decoding.batch_size ${batch_size} @@ -45,7 +45,7 @@ for type in ctc_prefix_beam_search attention_rescoring; do --device ${device} \ --nproc 1 \ --config ${config_path} \ - --result_file ${ckpt_prefix}.rsl \ + --result_file ${ckpt_prefix}.${type}.rsl \ --checkpoint_path ${ckpt_prefix} \ --opts decoding.decoding_method ${type} decoding.batch_size ${batch_size}