more decoding method (#618)

* more decoding method * all decode method test scripts; result readme * exp libri confi * parallel data scripts; more mask test; need pybind11 repo * speed perturb config * libri conf test set
5 years ago · 0a3a840bee
parent 295f8bdad5
commit 0a3a840bee
15 changed files with 140 additions and 55 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,7 +1,7 @@
 .DS_Store
 *.pyc
 .vscode
-*.log
+*log
 *.pdmodel
 *.pdiparams*
 *.zip
--- a/deepspeech/exps/deepspeech2/model.py
+++ b/deepspeech/exps/deepspeech2/model.py
@ -168,7 +168,7 @@ class DeepSpeech2Trainer(Trainer):
            train_dataset,
            batch_sampler=batch_sampler,
            collate_fn=collate_fn,
-            num_workers=config.data.num_workers, )
+            num_workers=config.data.num_workers)
        self.valid_loader = DataLoader(
            dev_dataset,
            batch_size=config.data.batch_size,
--- a/deepspeech/exps/u2/model.py
+++ b/deepspeech/exps/u2/model.py
@ -450,7 +450,7 @@ class U2Tester(U2Trainer):
        logger.info(msg)
        # test meta results
-        err_meta_path = os.path.splitext(self.args.checkpoint_path)[0] + '.err'
+        err_meta_path = os.path.splitext(self.args.result_file)[0] + '.err'
        err_type_str = "{}".format(error_rate_type)
        with open(err_meta_path, 'w') as f:
            data = json.dumps({
@ -471,6 +471,8 @@ class U2Tester(U2Trainer):
                errors_sum,
                "ref_len":
                len_refs,
                "decode_method":
                self.config.decoding.decoding_method,
            })
            f.write(data + '\n')
--- a/examples/aishell/s0/local/data.sh
+++ b/examples/aishell/s0/local/data.sh
@ -66,19 +66,22 @@ fi
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # format manifest with tokenids, vocab size
    for dataset in train dev test; do
    {
        python3 ${MAIN_ROOT}/utils/format_data.py \
-        --feat_type "raw" \
+                --feat_type "raw" \
-        --cmvn_path "data/mean_std.json" \
+                --cmvn_path "data/mean_std.json" \
-        --unit_type "char" \
+                --unit_type "char" \
-        --vocab_path="data/vocab.txt" \
+                --vocab_path="data/vocab.txt" \
-        --manifest_path="data/manifest.${dataset}.raw" \
+                --manifest_path="data/manifest.${dataset}.raw" \
-        --output_path="data/manifest.${dataset}"
+                --output_path="data/manifest.${dataset}"
    done
-    if [ $? -ne 0 ]; then
+        if [ $? -ne 0 ]; then
-        echo "Formt mnaifest failed. Terminated."
+                echo "Formt mnaifest failed. Terminated."
-        exit 1
+                exit 1
-    fi
+        fi
    } &
    done
    wait
 fi
 echo "Aishell data preparation done."
--- a/examples/aishell/s1/README.md
+++ b/examples/aishell/s1/README.md
@ -0,0 +1,14 @@
 # Aishell
 ## Conformer
 | Model | Config | Augmentation| Test set | Decode method | Loss | WER |
 | --- | --- | --- | --- | --- | --- |
 | conformer | conf/conformer.yaml | spec_aug + shift | test | attention | - | 0.059858 |
 | conformer | conf/conformer.yaml | spec_aug + shift | test | ctc_greedy_search | - | 0.062311 |
 | conformer | conf/conformer.yaml | spec_aug + shift | test | ctc_prefix_beam_search | - | 0.062196 |
 | conformer | conf/conformer.yaml | spec_aug + shift | test | attention_rescoring | - | 0.054694 |
 ## Transformer
 | Model | Config | Augmentation| Test set | Decode method | Loss | WER |
 | --- | --- | --- | --- | --- | --- |
 | transformer | conf/transformer.yaml | spec_aug + shift | test | attention | - | - |
--- a/examples/aishell/s1/local/data.sh
+++ b/examples/aishell/s1/local/data.sh
@ -67,19 +67,22 @@ fi
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # format manifest with tokenids, vocab size
    for dataset in train dev test; do
    {
        python3 ${MAIN_ROOT}/utils/format_data.py \
-        --feat_type "raw" \
+            --feat_type "raw" \
-        --cmvn_path "data/mean_std.json" \
+            --cmvn_path "data/mean_std.json" \
-        --unit_type "char" \
+            --unit_type "char" \
-        --vocab_path="data/vocab.txt" \
+            --vocab_path="data/vocab.txt" \
-        --manifest_path="data/manifest.${dataset}.raw" \
+            --manifest_path="data/manifest.${dataset}.raw" \
-        --output_path="data/manifest.${dataset}"
+            --output_path="data/manifest.${dataset}"
    done
-    if [ $? -ne 0 ]; then
+        if [ $? -ne 0 ]; then
-        echo "Formt mnaifest failed. Terminated."
+            echo "Formt mnaifest failed. Terminated."
-        exit 1
+            exit 1
-    fi
+        fi
    } &
    done
    wait
 fi
 echo "Aishell data preparation done."
--- a/examples/aishell/s1/local/test.sh
+++ b/examples/aishell/s1/local/test.sh
@ -21,17 +21,39 @@ ckpt_prefix=$2
 #    exit 1
 #fi
 python3 -u ${BIN_DIR}/test.py \
 --device ${device} \
 --nproc 1 \
 --config ${config_path} \
 --result_file ${ckpt_prefix}.rsl \
 --checkpoint_path ${ckpt_prefix}
 if [ $? -ne 0 ]; then
    echo "Failed in evaluation!"
    exit 1
 fi
 for type in attention ctc_greedy_search; do
    echo "decoding ${type}"
    batch_size=64
    python3 -u ${BIN_DIR}/test.py \
    --device ${device} \
    --nproc 1 \
    --config ${config_path} \
    --result_file ${ckpt_prefix}.${type}.rsl \
    --checkpoint_path ${ckpt_prefix} \
    --opts decoding.decoding_method ${type} decoding.batch_size ${batch_size}
    if [ $? -ne 0 ]; then
        echo "Failed in evaluation!"
        exit 1
    fi
 done
 for type in ctc_prefix_beam_search attention_rescoring; do
    echo "decoding ${type}"
    batch_size=1
    python3 -u ${BIN_DIR}/test.py \
    --device ${device} \
    --nproc 1 \
    --config ${config_path} \
    --result_file ${ckpt_prefix}.${type}.rsl \
    --checkpoint_path ${ckpt_prefix} \
    --opts decoding.decoding_method ${type} decoding.batch_size ${batch_size}
    if [ $? -ne 0 ]; then
        echo "Failed in evaluation!"
        exit 1
    fi
 done
 exit 0
--- a/examples/librispeech/README.md
+++ b/examples/librispeech/README.md
@ -1,3 +1,3 @@
 # ASR
 * s0 is for deepspeech2
-* s1 is for U2
+* s1 is for transformer/conformer/U2
--- a/examples/librispeech/s1/README.md
+++ b/examples/librispeech/s1/README.md
@ -0,0 +1,16 @@
 # LibriSpeech
 ## Conformer
 | Model | Config | Augmentation| Test set | Decode method | Loss | WER |
 | --- | --- | --- | --- | --- | --- |
 | conformer | conf/conformer.yaml | spec_aug + shift | test-all | attention | test-all 6.35 | 0.057117 |
 | conformer | conf/conformer.yaml | spec_aug + shift | test-clean | attention | test-all 6.35 | 0.030162 |
 | conformer | conf/conformer.yaml | spec_aug + shift | test-clean | ctc_greedy_search | test-all 6.35 | 0.037910 |
 | conformer | conf/conformer.yaml | spec_aug + shift | test-clean | ctc_prefix_beam_search | test-all 6.35 | 0.037761 |
 | conformer | conf/conformer.yaml | spec_aug + shift | test-clean | attention_rescoring | test-all 6.35 | 0.032115 |
 ## Transformer
 | Model | Config | Augmentation| Test set | Decode method | Loss | WER |
 | --- | --- | --- | --- | --- | --- |
 | transformer | conf/transformer.yaml | spec_aug + shift | test-all | attention | test-all 6.98 | 0.066500 |
 | transformer | conf/transformer.yaml | spec_aug + shift | test-clean | attention | test-all 6.98 | 0.036 |
--- a/examples/librispeech/s1/conf/conformer.yaml
+++ b/examples/librispeech/s1/conf/conformer.yaml
@ -2,7 +2,7 @@
 data:
  train_manifest: data/manifest.train
  dev_manifest: data/manifest.dev
-  test_manifest: data/manifest.test
+  test_manifest: data/manifest.test-clean
  vocab_filepath: data/vocab.txt 
  unit_type: 'spm'
  spm_model_prefix: 'data/bpe_unigram_5000'
--- a/examples/librispeech/s1/conf/transformer.yaml
+++ b/examples/librispeech/s1/conf/transformer.yaml
@ -2,7 +2,7 @@
 data:
  train_manifest: data/manifest.train
  dev_manifest: data/manifest.dev
-  test_manifest: data/manifest.test
+  test_manifest: data/manifest.test-clean
  vocab_filepath: data/vocab.txt
  unit_type: 'spm'
  spm_model_prefix: 'data/bpe_unigram_5000'
--- a/examples/librispeech/s1/local/test.sh
+++ b/examples/librispeech/s1/local/test.sh
@ -21,17 +21,39 @@ ckpt_prefix=$2
 #    exit 1
 #fi
-python3 -u ${BIN_DIR}/test.py \
+for type in attention ctc_greedy_search; do
--device ${device} \
+    echo "decoding ${type}"
--nproc 1 \
+    batch_size=64
--config ${config_path} \
+    python3 -u ${BIN_DIR}/test.py \
--result_file ${ckpt_prefix}.rsl \
+    --device ${device} \
--checkpoint_path ${ckpt_prefix}
+    --nproc 1 \
-
+    --config ${config_path} \
-if [ $? -ne 0 ]; then
+    --result_file ${ckpt_prefix}.${type}.rsl \
-    echo "Failed in evaluation!"
+    --checkpoint_path ${ckpt_prefix} \
-    exit 1
+    --opts decoding.decoding_method ${type} decoding.batch_size ${batch_size}
-fi
+
    if [ $? -ne 0 ]; then
        echo "Failed in evaluation!"
        exit 1
    fi
 done
 for type in ctc_prefix_beam_search attention_rescoring; do
    echo "decoding ${type}"
    batch_size=1
    python3 -u ${BIN_DIR}/test.py \
    --device ${device} \
    --nproc 1 \
    --config ${config_path} \
    --result_file ${ckpt_prefix}.${type}.rsl \
    --checkpoint_path ${ckpt_prefix} \
    --opts decoding.decoding_method ${type} decoding.batch_size ${batch_size}
    if [ $? -ne 0 ]; then
        echo "Failed in evaluation!"
        exit 1
    fi
 done
 exit 0
--- a/examples/tiny/s1/run.sh
+++ b/examples/tiny/s1/run.sh
--- a/requirements.txt
+++ b/requirements.txt
@ -9,3 +9,4 @@ sox
 tensorboardX
 typeguard
 yacs
 pybind11
--- a/tests/mask_test.py
+++ b/tests/mask_test.py
@ -50,7 +50,9 @@ class TestU2Model(unittest.TestCase):
    def test_make_pad_mask(self):
        res = make_pad_mask(self.lengths)
        res1 = make_non_pad_mask(self.lengths).logical_not()
        self.assertSequenceEqual(res.numpy().tolist(), self.pad_masks.tolist())
        self.assertSequenceEqual(res.numpy().tolist(), res1.tolist())
 if __name__ == '__main__':