diff --git a/audio/setup.py b/audio/setup.py index ffee6f9d..ec67c81d 100644 --- a/audio/setup.py +++ b/audio/setup.py @@ -19,7 +19,7 @@ from setuptools.command.install import install from setuptools.command.test import test # set the version here -VERSION = '1.0.0a' +VERSION = '0.0.0' # Inspired by the example at https://pytest.org/latest/goodpractises.html diff --git a/docs/source/released_model.md b/docs/source/released_model.md index aee44859..61a3eb26 100644 --- a/docs/source/released_model.md +++ b/docs/source/released_model.md @@ -6,8 +6,10 @@ ### Speech Recognition Model Acoustic Model | Training Data | Token-based | Size | Descriptions | CER | WER | Hours of speech | Example Link :-------------:| :------------:| :-----: | -----: | :-----: |:-----:| :-----: | :-----: | :-----: -[Ds2 Online Aishell ASR0 Model](https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_fbank161_ckpt_0.2.1.model.tar.gz) | Aishell Dataset | Char-based | 491 MB | 2 Conv + 5 LSTM layers with only forward direction | 0.0666 |-| 151 h | [D2 Online Aishell ASR0](../../examples/aishell/asr0) +[Ds2 Online Wenetspeech ASR0 Model](https://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr0/asr0_deepspeech2_online_wenetspeech_ckpt_1.0.0a.model.tar.gz) | Wenetspeech Dataset | Char-based | 1.2 GB | 2 Conv + 5 LSTM layers | 0.152 (test\_net, w/o LM), 0.053 (aishell, w/ LM) |-| 10000 h |- +[Ds2 Online Aishell ASR0 Model](https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_fbank161_ckpt_0.2.1.model.tar.gz) | Aishell Dataset | Char-based | 491 MB | 2 Conv + 5 LSTM layers | 0.0666 |-| 151 h | [D2 Online Aishell ASR0](../../examples/aishell/asr0) [Ds2 Offline Aishell ASR0 Model](https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_aishell_ckpt_0.1.1.model.tar.gz)| Aishell Dataset | Char-based | 306 MB | 2 Conv + 3 bidirectional GRU layers| 0.064 |-| 151 h | [Ds2 Offline Aishell ASR0](../../examples/aishell/asr0) +[Conformer Online Wenetspeech ASR1 Model](https://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr1/asr1_chunk_conformer_wenetspeech_ckpt_1.0.0a.model.tar.gz) | WenetSpeech Dataset | Char-based | 457 MB | Encoder:Conformer, Decoder:Transformer, Decoding method: Attention rescoring| 0.11 (test\_net) |-| 10000 h |- [Conformer Online Aishell ASR1 Model](https://paddlespeech.bj.bcebos.com/s2t/aishell/asr1/asr1_chunk_conformer_aishell_ckpt_0.2.0.model.tar.gz) | Aishell Dataset | Char-based | 189 MB | Encoder:Conformer, Decoder:Transformer, Decoding method: Attention rescoring| 0.0544 |-| 151 h | [Conformer Online Aishell ASR1](../../examples/aishell/asr1) [Conformer Offline Aishell ASR1 Model](https://paddlespeech.bj.bcebos.com/s2t/aishell/asr1/asr1_conformer_aishell_ckpt_0.1.2.model.tar.gz) | Aishell Dataset | Char-based | 189 MB | Encoder:Conformer, Decoder:Transformer, Decoding method: Attention rescoring | 0.0464 |-| 151 h | [Conformer Offline Aishell ASR1](../../examples/aishell/asr1) [Transformer Aishell ASR1 Model](https://paddlespeech.bj.bcebos.com/s2t/aishell/asr1/asr1_transformer_aishell_ckpt_0.1.1.model.tar.gz) | Aishell Dataset | Char-based | 128 MB | Encoder:Transformer, Decoder:Transformer, Decoding method: Attention rescoring | 0.0523 || 151 h | [Transformer Aishell ASR1](../../examples/aishell/asr1) diff --git a/examples/wenetspeech/asr0/RESULTS.md b/examples/wenetspeech/asr0/RESULTS.md new file mode 100644 index 00000000..17c0b9e5 --- /dev/null +++ b/examples/wenetspeech/asr0/RESULTS.md @@ -0,0 +1,7 @@ +# Wenetspeech + +## Deepspeech2 Streaming + +| Model | Number of Params | Release | Config | Test set | Valid Loss | CER | +| --- | --- | --- | --- | --- | --- | --- | +| DeepSpeech2 | 1.2G | r1.0.0a | conf/deepspeech2\_online.yaml + spec aug + fbank161 | test\_net | 13.307 | 15.02 | diff --git a/examples/wenetspeech/asr1/RESULTS.md b/examples/wenetspeech/asr1/RESULTS.md index 5c2b8143..2b03b3f2 100644 --- a/examples/wenetspeech/asr1/RESULTS.md +++ b/examples/wenetspeech/asr1/RESULTS.md @@ -1,9 +1,19 @@ # WenetSpeech +## Conformer online + +| Model | Params | Config | Augmentation| Test set | Decode method | Loss | CER | +| --- | --- | --- | --- | --- | --- | --- | --- | +| conformer | 123.47 M | conf/chunk_conformer.yaml | spec_aug | test net | attention | 9.329 | 0.1102 | +| conformer | 123.47 M | conf/chunk_conformer.yaml | spec_aug | test net | ctc_greedy_search | 9.329 | 0.1207 | +| conformer | 123.47 M | conf/chunk_conformer.yaml | spec_aug | test net | ctc_prefix_beam_search | 9.329 | 0.1203 | +| conformer | 123.47 M | conf/chunk_conformer.yaml | spec_aug | test net | attention_rescoring | 9.329 | 0.1100 | + + ## Conformer -| Model | Params | Config | Augmentation| Test set | Decode method | Loss | WER | +| Model | Params | Config | Augmentation| Test set | Decode method | Loss | CER | | --- | --- | --- | --- | --- | --- | --- | --- | | conformer | 32.52 M | conf/conformer.yaml | spec_aug | dev | attention | | | | conformer | 32.52 M | conf/conformer.yaml | spec_aug | test net | ctc_greedy_search | | | @@ -16,7 +26,7 @@ Pretrain model from http://mobvoi-speech-public.ufile.ucloud.cn/public/wenet/wenetspeech/20211025_conformer_exp.tar.gz -| Model | Params | Config | Augmentation| Test set | Decode method | Loss | WER | +| Model | Params | Config | Augmentation| Test set | Decode method | Loss | CER | | --- | --- | --- | --- | --- | --- | --- | --- | | conformer | 32.52 M | conf/conformer.yaml | spec_aug | aishell1 | attention | - | 0.048456 | | conformer | 32.52 M | conf/conformer.yaml | spec_aug | aishell1 | ctc_greedy_search | - | 0.052534 | diff --git a/setup.py b/setup.py index 8cd687d8..ad353d42 100644 --- a/setup.py +++ b/setup.py @@ -27,7 +27,7 @@ from setuptools.command.install import install HERE = Path(os.path.abspath(os.path.dirname(__file__))) -VERSION = '1.0.0a' +VERSION = '0.0.0' COMMITID = 'none' base = [