From 4d7046d2441a13d2f30100dba5fe172aed52c0c8 Mon Sep 17 00:00:00 2001 From: root Date: Sat, 7 May 2022 02:26:48 +0000 Subject: [PATCH 1/2] updata released model info, test=doc --- docs/source/released_model.md | 4 ++-- examples/wenetspeech/asr0/RESULTS.md | 1 + examples/wenetspeech/asr1/RESULTS.md | 14 ++++++++------ paddlespeech/cli/asr/pretrained_models.py | 2 +- 4 files changed, 12 insertions(+), 9 deletions(-) diff --git a/docs/source/released_model.md b/docs/source/released_model.md index 61a3eb26..435985bc 100644 --- a/docs/source/released_model.md +++ b/docs/source/released_model.md @@ -6,10 +6,10 @@ ### Speech Recognition Model Acoustic Model | Training Data | Token-based | Size | Descriptions | CER | WER | Hours of speech | Example Link :-------------:| :------------:| :-----: | -----: | :-----: |:-----:| :-----: | :-----: | :-----: -[Ds2 Online Wenetspeech ASR0 Model](https://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr0/asr0_deepspeech2_online_wenetspeech_ckpt_1.0.0a.model.tar.gz) | Wenetspeech Dataset | Char-based | 1.2 GB | 2 Conv + 5 LSTM layers | 0.152 (test\_net, w/o LM), 0.053 (aishell, w/ LM) |-| 10000 h |- +[Ds2 Online Wenetspeech ASR0 Model](https://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr0/asr0_deepspeech2_online_wenetspeech_ckpt_1.0.0a.model.tar.gz) | Wenetspeech Dataset | Char-based | 1.2 GB | 2 Conv + 5 LSTM layers | 0.152 (test\_net, w/o LM)
0.2417 (test\_meeting, w/o LM)
0.053 (aishell, w/ LM) |-| 10000 h |- [Ds2 Online Aishell ASR0 Model](https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_fbank161_ckpt_0.2.1.model.tar.gz) | Aishell Dataset | Char-based | 491 MB | 2 Conv + 5 LSTM layers | 0.0666 |-| 151 h | [D2 Online Aishell ASR0](../../examples/aishell/asr0) [Ds2 Offline Aishell ASR0 Model](https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_aishell_ckpt_0.1.1.model.tar.gz)| Aishell Dataset | Char-based | 306 MB | 2 Conv + 3 bidirectional GRU layers| 0.064 |-| 151 h | [Ds2 Offline Aishell ASR0](../../examples/aishell/asr0) -[Conformer Online Wenetspeech ASR1 Model](https://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr1/asr1_chunk_conformer_wenetspeech_ckpt_1.0.0a.model.tar.gz) | WenetSpeech Dataset | Char-based | 457 MB | Encoder:Conformer, Decoder:Transformer, Decoding method: Attention rescoring| 0.11 (test\_net) |-| 10000 h |- +[Conformer Online Wenetspeech ASR1 Model](https://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr1/asr1_chunk_conformer_wenetspeech_ckpt_1.0.0a.model.tar.gz) | WenetSpeech Dataset | Char-based | 457 MB | Encoder:Conformer, Decoder:Transformer, Decoding method: Attention rescoring| 0.11 (test\_net) 18.79 (test\_meeting) |-| 10000 h |- [Conformer Online Aishell ASR1 Model](https://paddlespeech.bj.bcebos.com/s2t/aishell/asr1/asr1_chunk_conformer_aishell_ckpt_0.2.0.model.tar.gz) | Aishell Dataset | Char-based | 189 MB | Encoder:Conformer, Decoder:Transformer, Decoding method: Attention rescoring| 0.0544 |-| 151 h | [Conformer Online Aishell ASR1](../../examples/aishell/asr1) [Conformer Offline Aishell ASR1 Model](https://paddlespeech.bj.bcebos.com/s2t/aishell/asr1/asr1_conformer_aishell_ckpt_0.1.2.model.tar.gz) | Aishell Dataset | Char-based | 189 MB | Encoder:Conformer, Decoder:Transformer, Decoding method: Attention rescoring | 0.0464 |-| 151 h | [Conformer Offline Aishell ASR1](../../examples/aishell/asr1) [Transformer Aishell ASR1 Model](https://paddlespeech.bj.bcebos.com/s2t/aishell/asr1/asr1_transformer_aishell_ckpt_0.1.1.model.tar.gz) | Aishell Dataset | Char-based | 128 MB | Encoder:Transformer, Decoder:Transformer, Decoding method: Attention rescoring | 0.0523 || 151 h | [Transformer Aishell ASR1](../../examples/aishell/asr1) diff --git a/examples/wenetspeech/asr0/RESULTS.md b/examples/wenetspeech/asr0/RESULTS.md index 17c0b9e5..0566a352 100644 --- a/examples/wenetspeech/asr0/RESULTS.md +++ b/examples/wenetspeech/asr0/RESULTS.md @@ -5,3 +5,4 @@ | Model | Number of Params | Release | Config | Test set | Valid Loss | CER | | --- | --- | --- | --- | --- | --- | --- | | DeepSpeech2 | 1.2G | r1.0.0a | conf/deepspeech2\_online.yaml + spec aug + fbank161 | test\_net | 13.307 | 15.02 | +| DeepSpeech2 | 1.2G | r1.0.0a | conf/deepspeech2\_online.yaml + spec aug + fbank161 | test\_meeting | 13.307 | 24.17 | diff --git a/examples/wenetspeech/asr1/RESULTS.md b/examples/wenetspeech/asr1/RESULTS.md index 2b03b3f2..72b815b7 100644 --- a/examples/wenetspeech/asr1/RESULTS.md +++ b/examples/wenetspeech/asr1/RESULTS.md @@ -4,12 +4,14 @@ | Model | Params | Config | Augmentation| Test set | Decode method | Loss | CER | | --- | --- | --- | --- | --- | --- | --- | --- | -| conformer | 123.47 M | conf/chunk_conformer.yaml | spec_aug | test net | attention | 9.329 | 0.1102 | -| conformer | 123.47 M | conf/chunk_conformer.yaml | spec_aug | test net | ctc_greedy_search | 9.329 | 0.1207 | -| conformer | 123.47 M | conf/chunk_conformer.yaml | spec_aug | test net | ctc_prefix_beam_search | 9.329 | 0.1203 | -| conformer | 123.47 M | conf/chunk_conformer.yaml | spec_aug | test net | attention_rescoring | 9.329 | 0.1100 | - - +| conformer_online | 123.47 M | conf/chunk_conformer.yaml | spec_aug | test net | attention | 9.329 | 0.1102 | +| conformer_online | 123.47 M | conf/chunk_conformer.yaml | spec_aug | test net | ctc_greedy_search | 9.329 | 0.1207 | +| conformer_online | 123.47 M | conf/chunk_conformer.yaml | spec_aug | test net | ctc_prefix_beam_search | 9.329 | 0.1203 | +| conformer_online | 123.47 M | conf/chunk_conformer.yaml | spec_aug | test net | attention_rescoring | 9.329 | 0.1100 | +| conformer_online | 123.47 M | conf/chunk_conformer.yaml | spec_aug | test meeting | attention | 9.329 | 0.1992 | +| conformer_online | 123.47 M | conf/chunk_conformer.yaml | spec_aug | test meeting | ctc_greedy_search | 9.329 | 0.1960 | +| conformer_online | 123.47 M | conf/chunk_conformer.yaml | spec_aug | test meeting | ctc_prefix_beam_search | 9.329 | 0.1946 | +| conformer_online | 123.47 M | conf/chunk_conformer.yaml | spec_aug | test meeting | attention_rescoring | 9.329 | 0.1879| ## Conformer diff --git a/paddlespeech/cli/asr/pretrained_models.py b/paddlespeech/cli/asr/pretrained_models.py index 7f198ad6..0f521884 100644 --- a/paddlespeech/cli/asr/pretrained_models.py +++ b/paddlespeech/cli/asr/pretrained_models.py @@ -27,7 +27,7 @@ pretrained_models = { 'ckpt_path': 'exp/conformer/checkpoints/wenetspeech', }, - "conformer_online_wenetspeech-zh-16k": { + "conformer_online_wenetspeech-zh-16k": { 'url': 'https://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr1/asr1_chunk_conformer_wenetspeech_ckpt_1.0.0a.model.tar.gz', 'md5': From 9947380898cb7f1aa043d3cdf8015892a09aa9c9 Mon Sep 17 00:00:00 2001 From: root Date: Sat, 7 May 2022 07:56:46 +0000 Subject: [PATCH 2/2] fix the doc, test=doc --- docs/source/released_model.md | 2 +- examples/wenetspeech/asr0/RESULTS.md | 4 ++-- examples/wenetspeech/asr1/RESULTS.md | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/source/released_model.md b/docs/source/released_model.md index 435985bc..74435ae1 100644 --- a/docs/source/released_model.md +++ b/docs/source/released_model.md @@ -9,7 +9,7 @@ Acoustic Model | Training Data | Token-based | Size | Descriptions | CER | WER | [Ds2 Online Wenetspeech ASR0 Model](https://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr0/asr0_deepspeech2_online_wenetspeech_ckpt_1.0.0a.model.tar.gz) | Wenetspeech Dataset | Char-based | 1.2 GB | 2 Conv + 5 LSTM layers | 0.152 (test\_net, w/o LM)
0.2417 (test\_meeting, w/o LM)
0.053 (aishell, w/ LM) |-| 10000 h |- [Ds2 Online Aishell ASR0 Model](https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_fbank161_ckpt_0.2.1.model.tar.gz) | Aishell Dataset | Char-based | 491 MB | 2 Conv + 5 LSTM layers | 0.0666 |-| 151 h | [D2 Online Aishell ASR0](../../examples/aishell/asr0) [Ds2 Offline Aishell ASR0 Model](https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_aishell_ckpt_0.1.1.model.tar.gz)| Aishell Dataset | Char-based | 306 MB | 2 Conv + 3 bidirectional GRU layers| 0.064 |-| 151 h | [Ds2 Offline Aishell ASR0](../../examples/aishell/asr0) -[Conformer Online Wenetspeech ASR1 Model](https://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr1/asr1_chunk_conformer_wenetspeech_ckpt_1.0.0a.model.tar.gz) | WenetSpeech Dataset | Char-based | 457 MB | Encoder:Conformer, Decoder:Transformer, Decoding method: Attention rescoring| 0.11 (test\_net) 18.79 (test\_meeting) |-| 10000 h |- +[Conformer Online Wenetspeech ASR1 Model](https://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr1/asr1_chunk_conformer_wenetspeech_ckpt_1.0.0a.model.tar.gz) | WenetSpeech Dataset | Char-based | 457 MB | Encoder:Conformer, Decoder:Transformer, Decoding method: Attention rescoring| 0.11 (test\_net) 0.1879 (test\_meeting) |-| 10000 h |- [Conformer Online Aishell ASR1 Model](https://paddlespeech.bj.bcebos.com/s2t/aishell/asr1/asr1_chunk_conformer_aishell_ckpt_0.2.0.model.tar.gz) | Aishell Dataset | Char-based | 189 MB | Encoder:Conformer, Decoder:Transformer, Decoding method: Attention rescoring| 0.0544 |-| 151 h | [Conformer Online Aishell ASR1](../../examples/aishell/asr1) [Conformer Offline Aishell ASR1 Model](https://paddlespeech.bj.bcebos.com/s2t/aishell/asr1/asr1_conformer_aishell_ckpt_0.1.2.model.tar.gz) | Aishell Dataset | Char-based | 189 MB | Encoder:Conformer, Decoder:Transformer, Decoding method: Attention rescoring | 0.0464 |-| 151 h | [Conformer Offline Aishell ASR1](../../examples/aishell/asr1) [Transformer Aishell ASR1 Model](https://paddlespeech.bj.bcebos.com/s2t/aishell/asr1/asr1_transformer_aishell_ckpt_0.1.1.model.tar.gz) | Aishell Dataset | Char-based | 128 MB | Encoder:Transformer, Decoder:Transformer, Decoding method: Attention rescoring | 0.0523 || 151 h | [Transformer Aishell ASR1](../../examples/aishell/asr1) diff --git a/examples/wenetspeech/asr0/RESULTS.md b/examples/wenetspeech/asr0/RESULTS.md index 0566a352..0796b7bc 100644 --- a/examples/wenetspeech/asr0/RESULTS.md +++ b/examples/wenetspeech/asr0/RESULTS.md @@ -4,5 +4,5 @@ | Model | Number of Params | Release | Config | Test set | Valid Loss | CER | | --- | --- | --- | --- | --- | --- | --- | -| DeepSpeech2 | 1.2G | r1.0.0a | conf/deepspeech2\_online.yaml + spec aug + fbank161 | test\_net | 13.307 | 15.02 | -| DeepSpeech2 | 1.2G | r1.0.0a | conf/deepspeech2\_online.yaml + spec aug + fbank161 | test\_meeting | 13.307 | 24.17 | +| DeepSpeech2 | 1.2G | r1.0.0a | conf/deepspeech2\_online.yaml + spec aug + fbank161, w/o LM | test\_net | 13.307 | 15.02 | +| DeepSpeech2 | 1.2G | r1.0.0a | conf/deepspeech2\_online.yaml + spec aug + fbank161, w/o LM | test\_meeting | 13.307 | 24.17 | diff --git a/examples/wenetspeech/asr1/RESULTS.md b/examples/wenetspeech/asr1/RESULTS.md index 72b815b7..2f7ad659 100644 --- a/examples/wenetspeech/asr1/RESULTS.md +++ b/examples/wenetspeech/asr1/RESULTS.md @@ -2,7 +2,7 @@ ## Conformer online -| Model | Params | Config | Augmentation| Test set | Decode method | Loss | CER | +| Model | Params | Config | Augmentation| Test set | Decode method | Valid Loss | CER | | --- | --- | --- | --- | --- | --- | --- | --- | | conformer_online | 123.47 M | conf/chunk_conformer.yaml | spec_aug | test net | attention | 9.329 | 0.1102 | | conformer_online | 123.47 M | conf/chunk_conformer.yaml | spec_aug | test net | ctc_greedy_search | 9.329 | 0.1207 |