From 0a3a840beef768da54db66d26ccb8476300833a3 Mon Sep 17 00:00:00 2001
From: Hui Zhang <zhtclz@foxmail.com>
Date: Wed, 19 May 2021 10:56:47 +0800
Subject: [PATCH] more decoding method (#618)

* more decoding method

* all decode method test scripts; result readme

* exp libri confi

* parallel data scripts; more mask test; need pybind11 repo

* speed perturb config

* libri conf test set
---
 .gitignore                                    |  2 +-
 deepspeech/exps/deepspeech2/model.py          |  2 +-
 deepspeech/exps/u2/model.py                   |  4 +-
 examples/aishell/s0/local/data.sh             | 25 ++++++-----
 examples/aishell/s1/README.md                 | 14 ++++++
 examples/aishell/s1/local/data.sh             | 31 +++++++------
 examples/aishell/s1/local/test.sh             | 44 ++++++++++++++-----
 examples/librispeech/README.md                |  2 +-
 examples/librispeech/s1/README.md             | 16 +++++++
 examples/librispeech/s1/conf/conformer.yaml   |  4 +-
 examples/librispeech/s1/conf/transformer.yaml |  2 +-
 examples/librispeech/s1/local/test.sh         | 44 ++++++++++++++-----
 examples/tiny/s1/run.sh                       |  0
 requirements.txt                              |  3 +-
 tests/mask_test.py                            |  2 +
 15 files changed, 140 insertions(+), 55 deletions(-)
 create mode 100644 examples/aishell/s1/README.md
 create mode 100644 examples/librispeech/s1/README.md
 mode change 100644 => 100755 examples/tiny/s1/run.sh

diff --git a/.gitignore b/.gitignore
index 9b225057..6fa37722 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,7 +1,7 @@
 .DS_Store
 *.pyc
 .vscode
-*.log
+*log
 *.pdmodel
 *.pdiparams*
 *.zip
diff --git a/deepspeech/exps/deepspeech2/model.py b/deepspeech/exps/deepspeech2/model.py
index 643936f1..04137419 100644
--- a/deepspeech/exps/deepspeech2/model.py
+++ b/deepspeech/exps/deepspeech2/model.py
@@ -168,7 +168,7 @@ class DeepSpeech2Trainer(Trainer):
             train_dataset,
             batch_sampler=batch_sampler,
             collate_fn=collate_fn,
-            num_workers=config.data.num_workers, )
+            num_workers=config.data.num_workers)
         self.valid_loader = DataLoader(
             dev_dataset,
             batch_size=config.data.batch_size,
diff --git a/deepspeech/exps/u2/model.py b/deepspeech/exps/u2/model.py
index 00f4f5ec..f166a071 100644
--- a/deepspeech/exps/u2/model.py
+++ b/deepspeech/exps/u2/model.py
@@ -450,7 +450,7 @@ class U2Tester(U2Trainer):
         logger.info(msg)
 
         # test meta results
-        err_meta_path = os.path.splitext(self.args.checkpoint_path)[0] + '.err'
+        err_meta_path = os.path.splitext(self.args.result_file)[0] + '.err'
         err_type_str = "{}".format(error_rate_type)
         with open(err_meta_path, 'w') as f:
             data = json.dumps({
@@ -471,6 +471,8 @@ class U2Tester(U2Trainer):
                 errors_sum,
                 "ref_len":
                 len_refs,
+                "decode_method":
+                self.config.decoding.decoding_method,
             })
             f.write(data + '\n')
 
diff --git a/examples/aishell/s0/local/data.sh b/examples/aishell/s0/local/data.sh
index c92152c7..2f09b14a 100755
--- a/examples/aishell/s0/local/data.sh
+++ b/examples/aishell/s0/local/data.sh
@@ -66,19 +66,22 @@ fi
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
     # format manifest with tokenids, vocab size
     for dataset in train dev test; do
+    {
         python3 ${MAIN_ROOT}/utils/format_data.py \
-        --feat_type "raw" \
-        --cmvn_path "data/mean_std.json" \
-        --unit_type "char" \
-        --vocab_path="data/vocab.txt" \
-        --manifest_path="data/manifest.${dataset}.raw" \
-        --output_path="data/manifest.${dataset}"
-    done
+                --feat_type "raw" \
+                --cmvn_path "data/mean_std.json" \
+                --unit_type "char" \
+                --vocab_path="data/vocab.txt" \
+                --manifest_path="data/manifest.${dataset}.raw" \
+                --output_path="data/manifest.${dataset}"
 
-    if [ $? -ne 0 ]; then
-        echo "Formt mnaifest failed. Terminated."
-        exit 1
-    fi
+        if [ $? -ne 0 ]; then
+                echo "Formt mnaifest failed. Terminated."
+                exit 1
+        fi
+    } &
+    done
+    wait
 fi
 
 echo "Aishell data preparation done."
diff --git a/examples/aishell/s1/README.md b/examples/aishell/s1/README.md
new file mode 100644
index 00000000..9bfa45c9
--- /dev/null
+++ b/examples/aishell/s1/README.md
@@ -0,0 +1,14 @@
+# Aishell
+
+## Conformer
+| Model | Config | Augmentation| Test set | Decode method | Loss | WER |
+| --- | --- | --- | --- | --- | --- |
+| conformer | conf/conformer.yaml | spec_aug + shift | test | attention | - | 0.059858 |
+| conformer | conf/conformer.yaml | spec_aug + shift | test | ctc_greedy_search | - | 0.062311 |
+| conformer | conf/conformer.yaml | spec_aug + shift | test | ctc_prefix_beam_search | - | 0.062196 |
+| conformer | conf/conformer.yaml | spec_aug + shift | test | attention_rescoring | - | 0.054694 |
+
+## Transformer
+| Model | Config | Augmentation| Test set | Decode method | Loss | WER |
+| --- | --- | --- | --- | --- | --- |
+| transformer | conf/transformer.yaml | spec_aug + shift | test | attention | - | - |
diff --git a/examples/aishell/s1/local/data.sh b/examples/aishell/s1/local/data.sh
index cdc352d1..c6abce3b 100755
--- a/examples/aishell/s1/local/data.sh
+++ b/examples/aishell/s1/local/data.sh
@@ -14,7 +14,7 @@ if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
     python3 ${TARGET_DIR}/aishell/aishell.py \
     --manifest_prefix="data/manifest" \
     --target_dir="${TARGET_DIR}/aishell"
-    
+
     if [ $? -ne 0 ]; then
         echo "Prepare Aishell failed. Terminated."
         exit 1
@@ -33,7 +33,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
     --count_threshold=0 \
     --vocab_path="data/vocab.txt" \
     --manifest_paths "data/manifest.train.raw"
-    
+
     if [ $? -ne 0 ]; then
         echo "Build vocabulary failed. Terminated."
         exit 1
@@ -56,7 +56,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
     --num_samples=-1 \
     --num_workers=${num_workers} \
     --output_path="data/mean_std.json"
-    
+
     if [ $? -ne 0 ]; then
         echo "Compute mean and stddev failed. Terminated."
         exit 1
@@ -67,19 +67,22 @@ fi
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
     # format manifest with tokenids, vocab size
     for dataset in train dev test; do
+    {
         python3 ${MAIN_ROOT}/utils/format_data.py \
-        --feat_type "raw" \
-        --cmvn_path "data/mean_std.json" \
-        --unit_type "char" \
-        --vocab_path="data/vocab.txt" \
-        --manifest_path="data/manifest.${dataset}.raw" \
-        --output_path="data/manifest.${dataset}"
+            --feat_type "raw" \
+            --cmvn_path "data/mean_std.json" \
+            --unit_type "char" \
+            --vocab_path="data/vocab.txt" \
+            --manifest_path="data/manifest.${dataset}.raw" \
+            --output_path="data/manifest.${dataset}"
+
+        if [ $? -ne 0 ]; then
+            echo "Formt mnaifest failed. Terminated."
+            exit 1
+        fi
+    } &
     done
-    
-    if [ $? -ne 0 ]; then
-        echo "Formt mnaifest failed. Terminated."
-        exit 1
-    fi
+    wait
 fi
 
 echo "Aishell data preparation done."
diff --git a/examples/aishell/s1/local/test.sh b/examples/aishell/s1/local/test.sh
index 6d113986..0dfabc6e 100755
--- a/examples/aishell/s1/local/test.sh
+++ b/examples/aishell/s1/local/test.sh
@@ -21,17 +21,39 @@ ckpt_prefix=$2
 #    exit 1
 #fi
 
-python3 -u ${BIN_DIR}/test.py \
---device ${device} \
---nproc 1 \
---config ${config_path} \
---result_file ${ckpt_prefix}.rsl \
---checkpoint_path ${ckpt_prefix}
-
-if [ $? -ne 0 ]; then
-    echo "Failed in evaluation!"
-    exit 1
-fi
 
+for type in attention ctc_greedy_search; do
+    echo "decoding ${type}"
+    batch_size=64
+    python3 -u ${BIN_DIR}/test.py \
+    --device ${device} \
+    --nproc 1 \
+    --config ${config_path} \
+    --result_file ${ckpt_prefix}.${type}.rsl \
+    --checkpoint_path ${ckpt_prefix} \
+    --opts decoding.decoding_method ${type} decoding.batch_size ${batch_size}
+
+    if [ $? -ne 0 ]; then
+        echo "Failed in evaluation!"
+        exit 1
+    fi
+done
+
+for type in ctc_prefix_beam_search attention_rescoring; do
+    echo "decoding ${type}"
+    batch_size=1
+    python3 -u ${BIN_DIR}/test.py \
+    --device ${device} \
+    --nproc 1 \
+    --config ${config_path} \
+    --result_file ${ckpt_prefix}.${type}.rsl \
+    --checkpoint_path ${ckpt_prefix} \
+    --opts decoding.decoding_method ${type} decoding.batch_size ${batch_size}
+
+    if [ $? -ne 0 ]; then
+        echo "Failed in evaluation!"
+        exit 1
+    fi
+done
 
 exit 0
diff --git a/examples/librispeech/README.md b/examples/librispeech/README.md
index f46749b7..c351c1f6 100644
--- a/examples/librispeech/README.md
+++ b/examples/librispeech/README.md
@@ -1,3 +1,3 @@
 # ASR
 * s0 is for deepspeech2
-* s1 is for U2
+* s1 is for transformer/conformer/U2
diff --git a/examples/librispeech/s1/README.md b/examples/librispeech/s1/README.md
new file mode 100644
index 00000000..8fbbe9d7
--- /dev/null
+++ b/examples/librispeech/s1/README.md
@@ -0,0 +1,16 @@
+# LibriSpeech
+
+## Conformer
+| Model | Config | Augmentation| Test set | Decode method | Loss | WER |
+| --- | --- | --- | --- | --- | --- |
+| conformer | conf/conformer.yaml | spec_aug + shift | test-all | attention | test-all 6.35 | 0.057117 |
+| conformer | conf/conformer.yaml | spec_aug + shift | test-clean | attention | test-all 6.35 | 0.030162 |
+| conformer | conf/conformer.yaml | spec_aug + shift | test-clean | ctc_greedy_search | test-all 6.35 | 0.037910 |
+| conformer | conf/conformer.yaml | spec_aug + shift | test-clean | ctc_prefix_beam_search | test-all 6.35 | 0.037761 |
+| conformer | conf/conformer.yaml | spec_aug + shift | test-clean | attention_rescoring | test-all 6.35 | 0.032115 |
+
+## Transformer
+| Model | Config | Augmentation| Test set | Decode method | Loss | WER |
+| --- | --- | --- | --- | --- | --- |
+| transformer | conf/transformer.yaml | spec_aug + shift | test-all | attention | test-all 6.98 | 0.066500 |
+| transformer | conf/transformer.yaml | spec_aug + shift | test-clean | attention | test-all 6.98 | 0.036 |
diff --git a/examples/librispeech/s1/conf/conformer.yaml b/examples/librispeech/s1/conf/conformer.yaml
index f89f8265..fdc435b8 100644
--- a/examples/librispeech/s1/conf/conformer.yaml
+++ b/examples/librispeech/s1/conf/conformer.yaml
@@ -2,7 +2,7 @@
 data:
   train_manifest: data/manifest.train
   dev_manifest: data/manifest.dev
-  test_manifest: data/manifest.test
+  test_manifest: data/manifest.test-clean
   vocab_filepath: data/vocab.txt 
   unit_type: 'spm'
   spm_model_prefix: 'data/bpe_unigram_5000'
@@ -14,7 +14,7 @@ data:
   min_output_len: 0.0 # tokens
   max_output_len: 400.0 # tokens
   min_output_input_ratio: 0.05
-  max_output_input_ratio: 10.0
+  max_output_input_ratio: 10.0 
   raw_wav: True  # use raw_wav or kaldi feature
   specgram_type: fbank #linear, mfcc, fbank
   feat_dim: 80
diff --git a/examples/librispeech/s1/conf/transformer.yaml b/examples/librispeech/s1/conf/transformer.yaml
index 9014e5b8..a094b0fb 100644
--- a/examples/librispeech/s1/conf/transformer.yaml
+++ b/examples/librispeech/s1/conf/transformer.yaml
@@ -2,7 +2,7 @@
 data:
   train_manifest: data/manifest.train
   dev_manifest: data/manifest.dev
-  test_manifest: data/manifest.test
+  test_manifest: data/manifest.test-clean
   vocab_filepath: data/vocab.txt
   unit_type: 'spm'
   spm_model_prefix: 'data/bpe_unigram_5000'
diff --git a/examples/librispeech/s1/local/test.sh b/examples/librispeech/s1/local/test.sh
index 240a63b0..8c323e00 100755
--- a/examples/librispeech/s1/local/test.sh
+++ b/examples/librispeech/s1/local/test.sh
@@ -21,17 +21,39 @@ ckpt_prefix=$2
 #    exit 1
 #fi
 
-python3 -u ${BIN_DIR}/test.py \
---device ${device} \
---nproc 1 \
---config ${config_path} \
---result_file ${ckpt_prefix}.rsl \
---checkpoint_path ${ckpt_prefix}
-
-if [ $? -ne 0 ]; then
-    echo "Failed in evaluation!"
-    exit 1
-fi
+for type in attention ctc_greedy_search; do
+    echo "decoding ${type}"
+    batch_size=64
+    python3 -u ${BIN_DIR}/test.py \
+    --device ${device} \
+    --nproc 1 \
+    --config ${config_path} \
+    --result_file ${ckpt_prefix}.${type}.rsl \
+    --checkpoint_path ${ckpt_prefix} \
+    --opts decoding.decoding_method ${type} decoding.batch_size ${batch_size}
+
+    if [ $? -ne 0 ]; then
+        echo "Failed in evaluation!"
+        exit 1
+    fi
+done
+
+for type in ctc_prefix_beam_search attention_rescoring; do
+    echo "decoding ${type}"
+    batch_size=1
+    python3 -u ${BIN_DIR}/test.py \
+    --device ${device} \
+    --nproc 1 \
+    --config ${config_path} \
+    --result_file ${ckpt_prefix}.${type}.rsl \
+    --checkpoint_path ${ckpt_prefix} \
+    --opts decoding.decoding_method ${type} decoding.batch_size ${batch_size}
+
+    if [ $? -ne 0 ]; then
+        echo "Failed in evaluation!"
+        exit 1
+    fi
+done
 
 
 exit 0
diff --git a/examples/tiny/s1/run.sh b/examples/tiny/s1/run.sh
old mode 100644
new mode 100755
diff --git a/requirements.txt b/requirements.txt
index fc24e50e..315bb69e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,4 +8,5 @@ SoundFile==0.9.0.post1
 sox
 tensorboardX
 typeguard
-yacs
\ No newline at end of file
+yacs
+pybind11
diff --git a/tests/mask_test.py b/tests/mask_test.py
index ce1a673a..cd37a899 100644
--- a/tests/mask_test.py
+++ b/tests/mask_test.py
@@ -50,7 +50,9 @@ class TestU2Model(unittest.TestCase):
 
     def test_make_pad_mask(self):
         res = make_pad_mask(self.lengths)
+        res1 = make_non_pad_mask(self.lengths).logical_not()
         self.assertSequenceEqual(res.numpy().tolist(), self.pad_masks.tolist())
+        self.assertSequenceEqual(res.numpy().tolist(), res1.tolist())
 
 
 if __name__ == '__main__':