From 0a3a840beef768da54db66d26ccb8476300833a3 Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Wed, 19 May 2021 10:56:47 +0800 Subject: [PATCH] more decoding method (#618) * more decoding method * all decode method test scripts; result readme * exp libri confi * parallel data scripts; more mask test; need pybind11 repo * speed perturb config * libri conf test set --- .gitignore | 2 +- deepspeech/exps/deepspeech2/model.py | 2 +- deepspeech/exps/u2/model.py | 4 +- examples/aishell/s0/local/data.sh | 25 ++++++----- examples/aishell/s1/README.md | 14 ++++++ examples/aishell/s1/local/data.sh | 31 +++++++------ examples/aishell/s1/local/test.sh | 44 ++++++++++++++----- examples/librispeech/README.md | 2 +- examples/librispeech/s1/README.md | 16 +++++++ examples/librispeech/s1/conf/conformer.yaml | 4 +- examples/librispeech/s1/conf/transformer.yaml | 2 +- examples/librispeech/s1/local/test.sh | 44 ++++++++++++++----- examples/tiny/s1/run.sh | 0 requirements.txt | 3 +- tests/mask_test.py | 2 + 15 files changed, 140 insertions(+), 55 deletions(-) create mode 100644 examples/aishell/s1/README.md create mode 100644 examples/librispeech/s1/README.md mode change 100644 => 100755 examples/tiny/s1/run.sh diff --git a/.gitignore b/.gitignore index 9b225057..6fa37722 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,7 @@ .DS_Store *.pyc .vscode -*.log +*log *.pdmodel *.pdiparams* *.zip diff --git a/deepspeech/exps/deepspeech2/model.py b/deepspeech/exps/deepspeech2/model.py index 643936f1..04137419 100644 --- a/deepspeech/exps/deepspeech2/model.py +++ b/deepspeech/exps/deepspeech2/model.py @@ -168,7 +168,7 @@ class DeepSpeech2Trainer(Trainer): train_dataset, batch_sampler=batch_sampler, collate_fn=collate_fn, - num_workers=config.data.num_workers, ) + num_workers=config.data.num_workers) self.valid_loader = DataLoader( dev_dataset, batch_size=config.data.batch_size, diff --git a/deepspeech/exps/u2/model.py b/deepspeech/exps/u2/model.py index 00f4f5ec..f166a071 100644 --- a/deepspeech/exps/u2/model.py +++ b/deepspeech/exps/u2/model.py @@ -450,7 +450,7 @@ class U2Tester(U2Trainer): logger.info(msg) # test meta results - err_meta_path = os.path.splitext(self.args.checkpoint_path)[0] + '.err' + err_meta_path = os.path.splitext(self.args.result_file)[0] + '.err' err_type_str = "{}".format(error_rate_type) with open(err_meta_path, 'w') as f: data = json.dumps({ @@ -471,6 +471,8 @@ class U2Tester(U2Trainer): errors_sum, "ref_len": len_refs, + "decode_method": + self.config.decoding.decoding_method, }) f.write(data + '\n') diff --git a/examples/aishell/s0/local/data.sh b/examples/aishell/s0/local/data.sh index c92152c7..2f09b14a 100755 --- a/examples/aishell/s0/local/data.sh +++ b/examples/aishell/s0/local/data.sh @@ -66,19 +66,22 @@ fi if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then # format manifest with tokenids, vocab size for dataset in train dev test; do + { python3 ${MAIN_ROOT}/utils/format_data.py \ - --feat_type "raw" \ - --cmvn_path "data/mean_std.json" \ - --unit_type "char" \ - --vocab_path="data/vocab.txt" \ - --manifest_path="data/manifest.${dataset}.raw" \ - --output_path="data/manifest.${dataset}" - done + --feat_type "raw" \ + --cmvn_path "data/mean_std.json" \ + --unit_type "char" \ + --vocab_path="data/vocab.txt" \ + --manifest_path="data/manifest.${dataset}.raw" \ + --output_path="data/manifest.${dataset}" - if [ $? -ne 0 ]; then - echo "Formt mnaifest failed. Terminated." - exit 1 - fi + if [ $? -ne 0 ]; then + echo "Formt mnaifest failed. Terminated." + exit 1 + fi + } & + done + wait fi echo "Aishell data preparation done." diff --git a/examples/aishell/s1/README.md b/examples/aishell/s1/README.md new file mode 100644 index 00000000..9bfa45c9 --- /dev/null +++ b/examples/aishell/s1/README.md @@ -0,0 +1,14 @@ +# Aishell + +## Conformer +| Model | Config | Augmentation| Test set | Decode method | Loss | WER | +| --- | --- | --- | --- | --- | --- | +| conformer | conf/conformer.yaml | spec_aug + shift | test | attention | - | 0.059858 | +| conformer | conf/conformer.yaml | spec_aug + shift | test | ctc_greedy_search | - | 0.062311 | +| conformer | conf/conformer.yaml | spec_aug + shift | test | ctc_prefix_beam_search | - | 0.062196 | +| conformer | conf/conformer.yaml | spec_aug + shift | test | attention_rescoring | - | 0.054694 | + +## Transformer +| Model | Config | Augmentation| Test set | Decode method | Loss | WER | +| --- | --- | --- | --- | --- | --- | +| transformer | conf/transformer.yaml | spec_aug + shift | test | attention | - | - | diff --git a/examples/aishell/s1/local/data.sh b/examples/aishell/s1/local/data.sh index cdc352d1..c6abce3b 100755 --- a/examples/aishell/s1/local/data.sh +++ b/examples/aishell/s1/local/data.sh @@ -14,7 +14,7 @@ if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then python3 ${TARGET_DIR}/aishell/aishell.py \ --manifest_prefix="data/manifest" \ --target_dir="${TARGET_DIR}/aishell" - + if [ $? -ne 0 ]; then echo "Prepare Aishell failed. Terminated." exit 1 @@ -33,7 +33,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then --count_threshold=0 \ --vocab_path="data/vocab.txt" \ --manifest_paths "data/manifest.train.raw" - + if [ $? -ne 0 ]; then echo "Build vocabulary failed. Terminated." exit 1 @@ -56,7 +56,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then --num_samples=-1 \ --num_workers=${num_workers} \ --output_path="data/mean_std.json" - + if [ $? -ne 0 ]; then echo "Compute mean and stddev failed. Terminated." exit 1 @@ -67,19 +67,22 @@ fi if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then # format manifest with tokenids, vocab size for dataset in train dev test; do + { python3 ${MAIN_ROOT}/utils/format_data.py \ - --feat_type "raw" \ - --cmvn_path "data/mean_std.json" \ - --unit_type "char" \ - --vocab_path="data/vocab.txt" \ - --manifest_path="data/manifest.${dataset}.raw" \ - --output_path="data/manifest.${dataset}" + --feat_type "raw" \ + --cmvn_path "data/mean_std.json" \ + --unit_type "char" \ + --vocab_path="data/vocab.txt" \ + --manifest_path="data/manifest.${dataset}.raw" \ + --output_path="data/manifest.${dataset}" + + if [ $? -ne 0 ]; then + echo "Formt mnaifest failed. Terminated." + exit 1 + fi + } & done - - if [ $? -ne 0 ]; then - echo "Formt mnaifest failed. Terminated." - exit 1 - fi + wait fi echo "Aishell data preparation done." diff --git a/examples/aishell/s1/local/test.sh b/examples/aishell/s1/local/test.sh index 6d113986..0dfabc6e 100755 --- a/examples/aishell/s1/local/test.sh +++ b/examples/aishell/s1/local/test.sh @@ -21,17 +21,39 @@ ckpt_prefix=$2 # exit 1 #fi -python3 -u ${BIN_DIR}/test.py \ ---device ${device} \ ---nproc 1 \ ---config ${config_path} \ ---result_file ${ckpt_prefix}.rsl \ ---checkpoint_path ${ckpt_prefix} - -if [ $? -ne 0 ]; then - echo "Failed in evaluation!" - exit 1 -fi +for type in attention ctc_greedy_search; do + echo "decoding ${type}" + batch_size=64 + python3 -u ${BIN_DIR}/test.py \ + --device ${device} \ + --nproc 1 \ + --config ${config_path} \ + --result_file ${ckpt_prefix}.${type}.rsl \ + --checkpoint_path ${ckpt_prefix} \ + --opts decoding.decoding_method ${type} decoding.batch_size ${batch_size} + + if [ $? -ne 0 ]; then + echo "Failed in evaluation!" + exit 1 + fi +done + +for type in ctc_prefix_beam_search attention_rescoring; do + echo "decoding ${type}" + batch_size=1 + python3 -u ${BIN_DIR}/test.py \ + --device ${device} \ + --nproc 1 \ + --config ${config_path} \ + --result_file ${ckpt_prefix}.${type}.rsl \ + --checkpoint_path ${ckpt_prefix} \ + --opts decoding.decoding_method ${type} decoding.batch_size ${batch_size} + + if [ $? -ne 0 ]; then + echo "Failed in evaluation!" + exit 1 + fi +done exit 0 diff --git a/examples/librispeech/README.md b/examples/librispeech/README.md index f46749b7..c351c1f6 100644 --- a/examples/librispeech/README.md +++ b/examples/librispeech/README.md @@ -1,3 +1,3 @@ # ASR * s0 is for deepspeech2 -* s1 is for U2 +* s1 is for transformer/conformer/U2 diff --git a/examples/librispeech/s1/README.md b/examples/librispeech/s1/README.md new file mode 100644 index 00000000..8fbbe9d7 --- /dev/null +++ b/examples/librispeech/s1/README.md @@ -0,0 +1,16 @@ +# LibriSpeech + +## Conformer +| Model | Config | Augmentation| Test set | Decode method | Loss | WER | +| --- | --- | --- | --- | --- | --- | +| conformer | conf/conformer.yaml | spec_aug + shift | test-all | attention | test-all 6.35 | 0.057117 | +| conformer | conf/conformer.yaml | spec_aug + shift | test-clean | attention | test-all 6.35 | 0.030162 | +| conformer | conf/conformer.yaml | spec_aug + shift | test-clean | ctc_greedy_search | test-all 6.35 | 0.037910 | +| conformer | conf/conformer.yaml | spec_aug + shift | test-clean | ctc_prefix_beam_search | test-all 6.35 | 0.037761 | +| conformer | conf/conformer.yaml | spec_aug + shift | test-clean | attention_rescoring | test-all 6.35 | 0.032115 | + +## Transformer +| Model | Config | Augmentation| Test set | Decode method | Loss | WER | +| --- | --- | --- | --- | --- | --- | +| transformer | conf/transformer.yaml | spec_aug + shift | test-all | attention | test-all 6.98 | 0.066500 | +| transformer | conf/transformer.yaml | spec_aug + shift | test-clean | attention | test-all 6.98 | 0.036 | diff --git a/examples/librispeech/s1/conf/conformer.yaml b/examples/librispeech/s1/conf/conformer.yaml index f89f8265..fdc435b8 100644 --- a/examples/librispeech/s1/conf/conformer.yaml +++ b/examples/librispeech/s1/conf/conformer.yaml @@ -2,7 +2,7 @@ data: train_manifest: data/manifest.train dev_manifest: data/manifest.dev - test_manifest: data/manifest.test + test_manifest: data/manifest.test-clean vocab_filepath: data/vocab.txt unit_type: 'spm' spm_model_prefix: 'data/bpe_unigram_5000' @@ -14,7 +14,7 @@ data: min_output_len: 0.0 # tokens max_output_len: 400.0 # tokens min_output_input_ratio: 0.05 - max_output_input_ratio: 10.0 + max_output_input_ratio: 10.0 raw_wav: True # use raw_wav or kaldi feature specgram_type: fbank #linear, mfcc, fbank feat_dim: 80 diff --git a/examples/librispeech/s1/conf/transformer.yaml b/examples/librispeech/s1/conf/transformer.yaml index 9014e5b8..a094b0fb 100644 --- a/examples/librispeech/s1/conf/transformer.yaml +++ b/examples/librispeech/s1/conf/transformer.yaml @@ -2,7 +2,7 @@ data: train_manifest: data/manifest.train dev_manifest: data/manifest.dev - test_manifest: data/manifest.test + test_manifest: data/manifest.test-clean vocab_filepath: data/vocab.txt unit_type: 'spm' spm_model_prefix: 'data/bpe_unigram_5000' diff --git a/examples/librispeech/s1/local/test.sh b/examples/librispeech/s1/local/test.sh index 240a63b0..8c323e00 100755 --- a/examples/librispeech/s1/local/test.sh +++ b/examples/librispeech/s1/local/test.sh @@ -21,17 +21,39 @@ ckpt_prefix=$2 # exit 1 #fi -python3 -u ${BIN_DIR}/test.py \ ---device ${device} \ ---nproc 1 \ ---config ${config_path} \ ---result_file ${ckpt_prefix}.rsl \ ---checkpoint_path ${ckpt_prefix} - -if [ $? -ne 0 ]; then - echo "Failed in evaluation!" - exit 1 -fi +for type in attention ctc_greedy_search; do + echo "decoding ${type}" + batch_size=64 + python3 -u ${BIN_DIR}/test.py \ + --device ${device} \ + --nproc 1 \ + --config ${config_path} \ + --result_file ${ckpt_prefix}.${type}.rsl \ + --checkpoint_path ${ckpt_prefix} \ + --opts decoding.decoding_method ${type} decoding.batch_size ${batch_size} + + if [ $? -ne 0 ]; then + echo "Failed in evaluation!" + exit 1 + fi +done + +for type in ctc_prefix_beam_search attention_rescoring; do + echo "decoding ${type}" + batch_size=1 + python3 -u ${BIN_DIR}/test.py \ + --device ${device} \ + --nproc 1 \ + --config ${config_path} \ + --result_file ${ckpt_prefix}.${type}.rsl \ + --checkpoint_path ${ckpt_prefix} \ + --opts decoding.decoding_method ${type} decoding.batch_size ${batch_size} + + if [ $? -ne 0 ]; then + echo "Failed in evaluation!" + exit 1 + fi +done exit 0 diff --git a/examples/tiny/s1/run.sh b/examples/tiny/s1/run.sh old mode 100644 new mode 100755 diff --git a/requirements.txt b/requirements.txt index fc24e50e..315bb69e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,4 +8,5 @@ SoundFile==0.9.0.post1 sox tensorboardX typeguard -yacs \ No newline at end of file +yacs +pybind11 diff --git a/tests/mask_test.py b/tests/mask_test.py index ce1a673a..cd37a899 100644 --- a/tests/mask_test.py +++ b/tests/mask_test.py @@ -50,7 +50,9 @@ class TestU2Model(unittest.TestCase): def test_make_pad_mask(self): res = make_pad_mask(self.lengths) + res1 = make_non_pad_mask(self.lengths).logical_not() self.assertSequenceEqual(res.numpy().tolist(), self.pad_masks.tolist()) + self.assertSequenceEqual(res.numpy().tolist(), res1.tolist()) if __name__ == '__main__':